date:20220616

Re: [RFC PATCH v5 2/4] target/riscv: smstateen check for h/senvcfg

2022-06-16 Thread Alistair Francis

On Sat, Jun 4, 2022 at 2:16 AM Mayuresh Chitale
 wrote:
>
> Accesses to henvcfg, henvcfgh and senvcfg are allowed
> only if corresponding bit in mstateen0/hstateen0 is
> enabled. Otherwise an illegal instruction trap is
> generated.
>
> Signed-off-by: Mayuresh Chitale 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/csr.c | 84 ++
>  1 file changed, 78 insertions(+), 6 deletions(-)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 324fefce59..ae91ae1f7e 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -39,6 +39,37 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations 
> *ops)
>  }
>
>  /* Predicates */
> +static RISCVException smstateen_acc_ok(CPURISCVState *env, int mode, int bit)
> +{
> +CPUState *cs = env_cpu(env);
> +RISCVCPU *cpu = RISCV_CPU(cs);
> +bool virt = riscv_cpu_virt_enabled(env);
> +
> +if (!cpu->cfg.ext_smstateen) {
> +return RISCV_EXCP_NONE;
> +}
> +
> +#if !defined(CONFIG_USER_ONLY)
> +if (!(env->mstateen[0] & 1UL << bit)) {
> +return RISCV_EXCP_ILLEGAL_INST;
> +}
> +
> +if (virt) {
> +if (!(env->hstateen[0] & 1UL << bit)) {
> +return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
> +}
> +}
> +
> +if (mode == PRV_U) {
> +if (!(env->sstateen[0] & 1UL << bit)) {
> +return RISCV_EXCP_ILLEGAL_INST;
> +}
> +}
> +#endif
> +
> +return RISCV_EXCP_NONE;
> +}
> +
>  static RISCVException fs(CPURISCVState *env, int csrno)
>  {
>  #if !defined(CONFIG_USER_ONLY)
> @@ -1557,6 +1588,13 @@ static RISCVException write_menvcfgh(CPURISCVState 
> *env, int csrno,
>  static RISCVException read_senvcfg(CPURISCVState *env, int csrno,
>   target_ulong *val)
>  {
> +RISCVException ret;
> +
> +ret = smstateen_acc_ok(env, PRV_S, SMSTATEEN0_HSENVCFG);
> +if (ret != RISCV_EXCP_NONE) {
> +return ret;
> +}
> +
>  *val = env->senvcfg;
>  return RISCV_EXCP_NONE;
>  }
> @@ -1565,15 +1603,27 @@ static RISCVException write_senvcfg(CPURISCVState 
> *env, int csrno,
>target_ulong val)
>  {
>  uint64_t mask = SENVCFG_FIOM | SENVCFG_CBIE | SENVCFG_CBCFE | 
> SENVCFG_CBZE;
> +RISCVException ret;
>
> -env->senvcfg = (env->senvcfg & ~mask) | (val & mask);
> +ret = smstateen_acc_ok(env, PRV_S, SMSTATEEN0_HSENVCFG);
> +if (ret != RISCV_EXCP_NONE) {
> +return ret;
> +}
>
> +env->senvcfg = (env->senvcfg & ~mask) | (val & mask);
>  return RISCV_EXCP_NONE;
>  }
>
>  static RISCVException read_henvcfg(CPURISCVState *env, int csrno,
>   target_ulong *val)
>  {
> +RISCVException ret;
> +
> +ret = smstateen_acc_ok(env, PRV_S, SMSTATEEN0_HSENVCFG);
> +if (ret != RISCV_EXCP_NONE) {
> +return ret;
> +}
> +
>  *val = env->henvcfg;
>  return RISCV_EXCP_NONE;
>  }
> @@ -1582,6 +1632,12 @@ static RISCVException write_henvcfg(CPURISCVState 
> *env, int csrno,
>target_ulong val)
>  {
>  uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE | 
> HENVCFG_CBZE;
> +RISCVException ret;
> +
> +ret = smstateen_acc_ok(env, PRV_S, SMSTATEEN0_HSENVCFG);
> +if (ret != RISCV_EXCP_NONE) {
> +return ret;
> +}
>
>  if (riscv_cpu_mxl(env) == MXL_RV64) {
>  mask |= HENVCFG_PBMTE | HENVCFG_STCE;
> @@ -1595,6 +1651,13 @@ static RISCVException write_henvcfg(CPURISCVState 
> *env, int csrno,
>  static RISCVException read_henvcfgh(CPURISCVState *env, int csrno,
>   target_ulong *val)
>  {
> +RISCVException ret;
> +
> +ret = smstateen_acc_ok(env, PRV_S, SMSTATEEN0_HSENVCFG);
> +if (ret != RISCV_EXCP_NONE) {
> +return ret;
> +}
> +
>  *val = env->henvcfg >> 32;
>  return RISCV_EXCP_NONE;
>  }
> @@ -1604,9 +1667,14 @@ static RISCVException write_henvcfgh(CPURISCVState 
> *env, int csrno,
>  {
>  uint64_t mask = HENVCFG_PBMTE | HENVCFG_STCE;
>  uint64_t valh = (uint64_t)val << 32;
> +RISCVException ret;
>
> -env->henvcfg = (env->henvcfg & ~mask) | (valh & mask);
> +ret = smstateen_acc_ok(env, PRV_S, SMSTATEEN0_HSENVCFG);
> +if (ret != RISCV_EXCP_NONE) {
> +return ret;
> +}
>
> +env->henvcfg = (env->henvcfg & ~mask) | (valh & mask);
>  return RISCV_EXCP_NONE;
>  }
>
> @@ -1628,7 +1696,8 @@ static RISCVException write_mstateen(CPURISCVState 
> *env, int csrno,
>   target_ulong new_val)
>  {
>  uint64_t *reg;
> -uint64_t wr_mask = 1UL << SMSTATEEN_STATEN;
> +uint64_t wr_mask = (1UL << SMSTATEEN_STATEN) |
> +   (1UL << SMSTATEEN0_HSENVCFG);
>
>  reg = &env->mstateen[csrno - CSR_MSTATEEN0];
>  write_smstateen(env, reg, wr_mask, new_val);
> @@ -1649,7 +1718,8 @@ static RISCVException write_mstateenh(CPURISCVState 
> *en

Re: [RFC PATCH v5 3/4] target/riscv: smstateen check for fcsr

2022-06-16 Thread Alistair Francis

On Sat, Jun 4, 2022 at 2:08 AM Mayuresh Chitale
 wrote:
>
> If smstateen is implemented and sstateen0.fcsr is clear
> then the floating point operations must return illegal
> instruction exception.
>
> Signed-off-by: Mayuresh Chitale 
> ---
>  target/riscv/csr.c | 24 
>  1 file changed, 24 insertions(+)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index ae91ae1f7e..8bbbed38ff 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -77,6 +77,10 @@ static RISCVException fs(CPURISCVState *env, int csrno)
>  !RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) {
>  return RISCV_EXCP_ILLEGAL_INST;
>  }
> +
> +if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
> +return smstateen_acc_ok(env, PRV_U, SMSTATEEN0_FCSR);
> +}

This only checks access to the CSRs. Shouldn't we also be throwing
errors if any instruction operates on an x register?

>  #endif
>  return RISCV_EXCP_NONE;
>  }
> @@ -1700,6 +1704,10 @@ static RISCVException write_mstateen(CPURISCVState 
> *env, int csrno,
> (1UL << SMSTATEEN0_HSENVCFG);
>
>  reg = &env->mstateen[csrno - CSR_MSTATEEN0];
> +if (riscv_has_ext(env, RVF)) {
> +wr_mask |= 1UL << SMSTATEEN0_FCSR;
> +}

This doesn't look right.

"Whenever misa.F = 1, bit 1 of mstateen0 is read-only zero". Shouldn't
that mean we don't allow writes if we have the RVF extension?

Alistair

> +
>  write_smstateen(env, reg, wr_mask, new_val);
>
>  return RISCV_EXCP_NONE;
> @@ -1724,6 +1732,10 @@ static RISCVException write_mstateenh(CPURISCVState 
> *env, int csrno,
>  reg = &env->mstateen[csrno - CSR_MSTATEEN0H];
>  val = (uint64_t)new_val << 32;
>  val |= *reg & 0x;
> +if (riscv_has_ext(env, RVF)) {
> +wr_mask |= 1UL << SMSTATEEN0_FCSR;
> +}
> +
>  write_smstateen(env, reg, wr_mask, val);
>
>  return RISCV_EXCP_NONE;
> @@ -1745,6 +1757,10 @@ static RISCVException write_hstateen(CPURISCVState 
> *env, int csrno,
> (1UL << SMSTATEEN0_HSENVCFG);
>  int index = csrno - CSR_HSTATEEN0;
>
> +if (riscv_has_ext(env, RVF)) {
> +wr_mask |= 1UL << SMSTATEEN0_FCSR;
> +}
> +
>  reg = &env->hstateen[index];
>  wr_mask &= env->mstateen[index];
>  write_smstateen(env, reg, wr_mask, new_val);
> @@ -1769,6 +1785,10 @@ static RISCVException write_hstateenh(CPURISCVState 
> *env, int csrno,
>  uint64_t wr_mask = (1UL << SMSTATEEN_STATEN) |
> (1UL << SMSTATEEN0_HSENVCFG);
>
> +if (riscv_has_ext(env, RVF)) {
> +wr_mask |= 1UL << SMSTATEEN0_FCSR;
> +}
> +
>  reg = &env->hstateen[index];
>  val = (uint64_t)new_val << 32;
>  val |= *reg & 0x;
> @@ -1794,6 +1814,10 @@ static RISCVException write_sstateen(CPURISCVState 
> *env, int csrno,
>  int index = csrno - CSR_SSTATEEN0;
>  bool virt = riscv_cpu_virt_enabled(env);
>
> +if (riscv_has_ext(env, RVF)) {
> +wr_mask |= 1UL << SMSTATEEN0_FCSR;
> +}
> +
>  reg = &env->sstateen[index];
>  if (virt) {
>  wr_mask &= env->mstateen[index];
> --
> 2.25.1
>
>

Re: [RFC PATCH v5 4/4] target/riscv: smstateen check for AIA/IMSIC

2022-06-16 Thread Alistair Francis

On Sat, Jun 4, 2022 at 2:15 AM Mayuresh Chitale
 wrote:
>
> If smstateen is implemented then accesses to AIA
> registers CSRS, IMSIC CSRs and other IMSIC registers
> is controlled by setting of corresponding bits in
> mstateen/hstateen registers. Otherwise an illegal
> instruction trap or virtual instruction trap is
> generated.
>
> Signed-off-by: Mayuresh Chitale 
> ---
>  target/riscv/csr.c | 253 -
>  1 file changed, 248 insertions(+), 5 deletions(-)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 8bbbed38ff..213b3c17ff 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -39,6 +39,7 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops)
>  }
>
>  /* Predicates */
> +#if !defined(CONFIG_USER_ONLY)

This should just be in the original patch.

>  static RISCVException smstateen_acc_ok(CPURISCVState *env, int mode, int bit)
>  {
>  CPUState *cs = env_cpu(env);
> @@ -49,7 +50,6 @@ static RISCVException smstateen_acc_ok(CPURISCVState *env, 
> int mode, int bit)
>  return RISCV_EXCP_NONE;
>  }
>
> -#if !defined(CONFIG_USER_ONLY)
>  if (!(env->mstateen[0] & 1UL << bit)) {
>  return RISCV_EXCP_ILLEGAL_INST;
>  }
> @@ -65,11 +65,57 @@ static RISCVException smstateen_acc_ok(CPURISCVState 
> *env, int mode, int bit)
>  return RISCV_EXCP_ILLEGAL_INST;
>  }
>  }
> -#endif
> -
>  return RISCV_EXCP_NONE;
>  }
>
> +static RISCVException smstateen_aia_acc_ok(CPURISCVState *env, int csrno)

The spec doesn't mention the effects on AIA, it just says that some
bits are reserved. How do you know what should happen here?

Alistair

> +{
> +int bit, mode;
> +
> +switch (csrno) {
> +case CSR_SSETEIPNUM:
> +case CSR_SCLREIPNUM:
> +case CSR_SSETEIENUM:
> +case CSR_SCLREIENUM:
> +case CSR_STOPEI:
> +case CSR_VSSETEIPNUM:
> +case CSR_VSCLREIPNUM:
> +case CSR_VSSETEIENUM:
> +case CSR_VSCLREIENUM:
> +case CSR_VSTOPEI:
> +case CSR_HSTATUS:
> +mode = PRV_S;
> +bit = SMSTATEEN0_IMSIC;
> +break;
> +
> +case CSR_SIEH:
> +case CSR_SIPH:
> +case CSR_HVIPH:
> +case CSR_HVICTL:
> +case CSR_HVIPRIO1:
> +case CSR_HVIPRIO2:
> +case CSR_HVIPRIO1H:
> +case CSR_HVIPRIO2H:
> +case CSR_VSIEH:
> +case CSR_VSIPH:
> +mode = PRV_S;
> +bit = SMSTATEEN0_AIA;
> +break;
> +
> +case CSR_SISELECT:
> +case CSR_VSISELECT:
> +mode = PRV_S;
> +bit = SMSTATEEN0_SVSLCT;
> +break;
> +
> +default:
> +return RISCV_EXCP_NONE;
> +}
> +
> +return smstateen_acc_ok(env, mode, bit);
> +}
> +#endif
> +
>  static RISCVException fs(CPURISCVState *env, int csrno)
>  {
>  #if !defined(CONFIG_USER_ONLY)
> @@ -1130,6 +1176,13 @@ static int rmw_xiselect(CPURISCVState *env, int csrno, 
> target_ulong *val,
>  target_ulong new_val, target_ulong wr_mask)
>  {
>  target_ulong *iselect;
> +RISCVException ret;
> +
> +/* Check if smstateen is enabled and this access is allowed */
> +ret = smstateen_aia_acc_ok(env, csrno);
> +if (ret != RISCV_EXCP_NONE) {
> +return ret;
> +}
>
>  /* Translate CSR number for VS-mode */
>  csrno = aia_xlate_vs_csrno(env, csrno);
> @@ -1212,7 +1265,9 @@ static int rmw_xireg(CPURISCVState *env, int csrno, 
> target_ulong *val,
>  bool virt;
>  uint8_t *iprio;
>  int ret = -EINVAL;
> -target_ulong priv, isel, vgein;
> +target_ulong priv, isel, vgein = 0;
> +CPUState *cs = env_cpu(env);
> +RISCVCPU *cpu = RISCV_CPU(cs);
>
>  /* Translate CSR number for VS-mode */
>  csrno = aia_xlate_vs_csrno(env, csrno);
> @@ -1241,11 +1296,20 @@ static int rmw_xireg(CPURISCVState *env, int csrno, 
> target_ulong *val,
>  };
>
>  /* Find the selected guest interrupt file */
> -vgein = (virt) ? get_field(env->hstatus, HSTATUS_VGEIN) : 0;
> +if (virt) {
> +if (!cpu->cfg.ext_smstateen ||
> +(env->hstateen[0] & 1UL << SMSTATEEN0_IMSIC)) {
> +vgein = get_field(env->hstatus, HSTATUS_VGEIN);
> +}
> +}
>
>  if (ISELECT_IPRIO0 <= isel && isel <= ISELECT_IPRIO15) {
>  /* Local interrupt priority registers not available for VS-mode */
>  if (!virt) {
> +if (priv == PRV_S && cpu->cfg.ext_smstateen &&
> +!(env->hstateen[0] & 1UL << SMSTATEEN0_AIA)) {
> +goto done;
> +}
>  ret = rmw_iprio(riscv_cpu_mxl_bits(env),
>  isel, iprio, val, new_val, wr_mask,
>  (priv == PRV_M) ? IRQ_M_EXT : IRQ_S_EXT);
> @@ -1279,6 +1343,13 @@ static int rmw_xsetclreinum(CPURISCVState *env, int 
> csrno, target_ulong *val,
>  int ret = -EINVAL;
>  bool set, pend, virt;
>  target_ulong priv, isel, vgein, xlen, nval, wmask;
> +RISCVException excp;
> +
> +/* Check if sm

Re: [PATCH 0/4] Multiple interface support on top of Multi-FD

2022-06-16 Thread Daniel P . Berrangé

On Wed, Jun 15, 2022 at 08:14:26PM +0100, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berra...@redhat.com) wrote:
> > On Fri, Jun 10, 2022 at 05:58:31PM +0530, manish.mishra wrote:
> > > 
> > > On 09/06/22 9:17 pm, Daniel P. Berrangé wrote:
> > > > On Thu, Jun 09, 2022 at 07:33:01AM +, Het Gala wrote:
> > > > > As of now, the multi-FD feature supports connection over the default 
> > > > > network
> > > > > only. This Patchset series is a Qemu side implementation of providing 
> > > > > multiple
> > > > > interfaces support for multi-FD. This enables us to fully utilize 
> > > > > dedicated or
> > > > > multiple NICs in case bonding of NICs is not possible.
> > > > > 
> > > > > 
> > > > > Introduction
> > > > > -
> > > > > Multi-FD Qemu implementation currently supports connection only on 
> > > > > the default
> > > > > network. This forbids us from advantages like:
> > > > > - Separating VM live migration traffic from the default network.
> > > 
> > > Hi Daniel,
> > > 
> > > I totally understand your concern around this approach increasing 
> > > compexity inside qemu,
> > > 
> > > when similar things can be done with NIC teaming. But we thought this 
> > > approach provides
> > > 
> > > much more flexibility to user in few cases like.
> > > 
> > > 1. We checked our customer data, almost all of the host had multiple NIC, 
> > > but LACP support
> > > 
> > >     in their setups was very rare. So for those cases this approach can 
> > > help in utilise multiple
> > > 
> > >     NICs as teaming is not possible there.
> > 
> > AFAIK,  LACP is not required in order to do link aggregation with Linux.
> > Traditional Linux bonding has no special NIC hardware or switch 
> > requirements,
> > so LACP is merely a "nice to have" in order to simplify some aspects.
> > 
> > IOW, migration with traffic spread across multiple NICs is already
> > possible AFAICT.
> 
> Are we sure that works with multifd?  I've seen a lot of bonding NIC
> setups which spread based on a hash of source/destination IP and port
> numbers; given that we use the same dest port and IP at the moment what
> happens in reality?  That hashing can be quite delicate for high
> bandwidth single streams.

The simplest Linux bonding mode does per-packet round-robin across 
NICs, so traffic from the collection of multifd connections should
fill up all the NICs in the bond. There are of course other modes
which may be sub-optimal for the reasons you describe. Which mode
to pick depends on the type of service traffic patterns you're
aiming to balance.

> > > > > Multi-interface with Multi-FD
> > > > > -
> > > > > Multiple-interface support over basic multi-FD has been implemented 
> > > > > in the
> > > > > patches. Advantages of this implementation are:
> > > > > - Able to separate live migration traffic from default network 
> > > > > interface by
> > > > >creating multiFD channels on ip addresses of multiple non-default 
> > > > > interfaces.
> > > > > - Can optimize the number of multi-FD channels on a particular 
> > > > > interface
> > > > >depending upon the network bandwidth limit on a particular 
> > > > > interface.
> > > > Manually assigning individual channels to different NICs is a pretty
> > > > inefficient way to optimizing traffic. Feels like you could easily get
> > > > into a situation where one NIC ends up idle while the other is busy,
> > > > especially if the traffic patterns are different. For example with
> > > > post-copy there's an extra channel for OOB async page requests, and
> > > > its far from clear that manually picking NICs per chanel upfront is
> > > > going work for that.  The kernel can continually dynamically balance
> > > > load on the fly and so do much better than any static mapping QEMU
> > > > tries to apply, especially if there are multiple distinct QEMU's
> > > > competing for bandwidth.
> > > > 
> > > Yes, Daniel current solution is only for pre-copy. As with postcopy
> > > multiFD is not yet supported but in future we can extend it for postcopy
> 
> I had been thinking about explicit selection of network device for NUMA
> use though; ideally I'd like to be able to associate a set of multifd
> threads to each NUMA node, and then associate a NIC with that set of
> threads; so that the migration happens down the NIC that's on the node
> the RAM is on.  On a really good day you'd have one NIC per top level
> NUMA node.

Now that's an interesting idea, and not one that can be dealt with
by bonding, since the network layer won't be aware of the NUMA
affinity constraints.


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[PATCH 1/2] hw/pci-host/pam.c: Fully support RE^WE semantics of i440FX PAM

2022-06-16 Thread Lev Kujawski

The Programmable Attribute Registers (PAM) of QEMU's emulated i440FX
chipset now fully support the exclusive Read Enable (RE) and Write
Enable (WE) modes by forwarding reads of the applicable PAM region to
RAM and writes to the bus or vice versa, respectively.

The prior behavior for the RE case was to setup a RAM alias and mark
it read-only, but no attempt was made to forward writes to the bus,
and read-only aliases of RAM do not prevent writes. Now, pam.c creates
a ROMD region (with read-only memory backing) coupled with a memory
operation that forwards writes to the bus.

For the WE case, a RAM alias was created, but with no attempt to
forward reads to the bus. Now, pam.c creates a MMIO region that writes
directly to RAM (bypassing the PAM region) and forwards reads to the
bus.

Additional changes:
- Change the type of pam_update parameter idx to type uint8_t,
  eliminating an assert check.
- Remove the fourth PAM alias, for normal RAM-based reads and writes
  of PAM regions, saving memory and clutter in mtree output.

Tested with SeaBIOS and AMIBIOS.

Signed-off-by: Lev Kujawski 
---
 hw/pci-host/pam.c | 135 +++---
 include/hw/pci-host/pam.h |   7 +-
 2 files changed, 117 insertions(+), 25 deletions(-)

diff --git a/hw/pci-host/pam.c b/hw/pci-host/pam.c
index 454dd120db..da89ca3b50 100644
--- a/hw/pci-host/pam.c
+++ b/hw/pci-host/pam.c
@@ -28,43 +28,132 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "hw/pci-host/pam.h"
 
+static void
+pam_rmem_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+PAMMemoryRegion * const pam = (PAMMemoryRegion *)opaque;
+
+(void)memory_region_dispatch_write(pam->pci_mr, pam->offset + addr, val,
+   size_memop(size), 
MEMTXATTRS_UNSPECIFIED);
+}
+
+static uint64_t
+pam_wmem_read(void *opaque, hwaddr addr, unsigned int size)
+{
+PAMMemoryRegion * const pam = (PAMMemoryRegion *)opaque;
+uint64_t val = (uint64_t)~0;
+
+(void)memory_region_dispatch_read(pam->pci_mr, pam->offset + addr, &val,
+  size_memop(size), 
MEMTXATTRS_UNSPECIFIED);
+
+return val;
+}
+
+static void
+pam_wmem_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+PAMMemoryRegion * const pam = (PAMMemoryRegion *)opaque;
+
+switch (size) {
+case 1:
+stb_p(pam->system_memory + addr, val);
+break;
+case 2:
+stw_le_p(pam->system_memory + addr, val);
+break;
+case 4:
+stl_le_p(pam->system_memory + addr, val);
+break;
+case 8:
+stq_le_p(pam->system_memory + addr, val);
+break;
+default:
+g_assert_not_reached();
+}
+}
+
+static const MemoryRegionOps pam_rmem_ops = {
+.write = pam_rmem_write,
+};
+
+static const MemoryRegionOps pam_wmem_ops = {
+.read = pam_wmem_read,
+.write = pam_wmem_write,
+.valid = {
+.min_access_size = 1,
+.max_access_size = 8,
+.unaligned = true,
+},
+.impl = {
+.min_access_size = 1,
+.max_access_size = 8,
+.unaligned = true,
+},
+};
+
 void init_pam(DeviceState *dev, MemoryRegion *ram_memory,
-  MemoryRegion *system_memory, MemoryRegion *pci_address_space,
-  PAMMemoryRegion *mem, uint32_t start, uint32_t size)
+  MemoryRegion *system, MemoryRegion *pci,
+  PAMMemoryRegion *pam, uint32_t start, uint32_t size)
 {
+char name[12] = "pam-splitr";
 int i;
 
-/* RAM */
-memory_region_init_alias(&mem->alias[3], OBJECT(dev), "pam-ram", 
ram_memory,
- start, size);
-/* ROM (XXX: not quite correct) */
-memory_region_init_alias(&mem->alias[1], OBJECT(dev), "pam-rom", 
ram_memory,
- start, size);
-memory_region_set_readonly(&mem->alias[1], true);
+name[10] = (start >> 14) + 17;
+name[11] = '\0';
+
+/* Forward all memory accesses to the bus.  */
+memory_region_init_alias(&pam->alias[0], OBJECT(dev), "pam-pci",
+ pci, start, size);
 
-/* XXX: should distinguish read/write cases */
-memory_region_init_alias(&mem->alias[0], OBJECT(dev), "pam-pci", 
pci_address_space,
- start, size);
-memory_region_init_alias(&mem->alias[2], OBJECT(dev), "pam-pci", 
ram_memory,
- start, size);
+/* Split modes */
+/* Forward reads to RAM, writes to the bus.  */
+memory_region_init_rom_device(&pam->alias[1], OBJECT(dev),
+  &pam_rmem_ops, pam, name, size,
+  &error_fatal);
+
+/* Forward writes to RAM, reads to the bus.  */
+name[9] = 'w';
+memory_region_init_io(&pam->alias[2], OBJECT(dev), &pam_wmem_ops,
+  pam, name, size);
 
 memory_region_transaction_begin();
-for (i = 0; i < 4; ++i) {
-memory

[PATCH 2/2] tests/qtest/i440fx-test.c: Enable full test of i440FX PAM operation

2022-06-16 Thread Lev Kujawski

With the prior patch in this series adding support for RE^WE PAM
semantics, the '#ifndef BROKEN' segments of test_i440fx_pam can now be
enabled.

Additionally:
- Verify that changing attributes does not affect the initial contents
  of the PAM region;
- Verify that that the first new mask is written before switching
  attributes;
- Switch back to PAM_RE after PAM_WE to read original contents;
- Tighten logic of the !WE write test because we know what the
  original contents were; and
- Write the last mask before testing for it.

Signed-off-by: Lev Kujawski 
---
 tests/qtest/i440fx-test.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/tests/qtest/i440fx-test.c b/tests/qtest/i440fx-test.c
index 6d7d4d8d8f..073a16bbed 100644
--- a/tests/qtest/i440fx-test.c
+++ b/tests/qtest/i440fx-test.c
@@ -236,33 +236,34 @@ static void test_i440fx_pam(gconstpointer opaque)
 
 /* Switch to WE for the area */
 pam_set(dev, i, PAM_RE | PAM_WE);
+/* Verify the RAM is still all zeros */
+g_assert(verify_area(pam_area[i].start, pam_area[i].end, 0));
 /* Write out a non-zero mask to the full area */
 write_area(pam_area[i].start, pam_area[i].end, 0x42);
-
-#ifndef BROKEN
-/* QEMU only supports a limited form of PAM */
+/* Verify the area contains the new mask */
+g_assert(verify_area(pam_area[i].start, pam_area[i].end, 0x42));
 
 /* Switch to !RE for the area */
 pam_set(dev, i, PAM_WE);
 /* Verify the area is not our mask */
 g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x42));
-#endif
 
-/* Verify the area is our new mask */
+/* Switch to !WE for the area */
+pam_set(dev, i, PAM_RE);
+/* Verify the area is once again our mask */
 g_assert(verify_area(pam_area[i].start, pam_area[i].end, 0x42));
 
 /* Write out a new mask */
 write_area(pam_area[i].start, pam_area[i].end, 0x82);
 
-#ifndef BROKEN
-/* QEMU only supports a limited form of PAM */
-
-/* Verify the area is not our mask */
-g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x82));
+/* Verify the area is not the new mask */
+g_assert(verify_area(pam_area[i].start, pam_area[i].end, 0x42));
 
 /* Switch to RE for the area */
 pam_set(dev, i, PAM_RE | PAM_WE);
-#endif
+/* Write out a new mask again */
+write_area(pam_area[i].start, pam_area[i].end, 0x82);
+
 /* Verify the area is our new mask */
 g_assert(verify_area(pam_area[i].start, pam_area[i].end, 0x82));
 
-- 
2.34.1

Re: [PATCH 0/4] Multiple interface support on top of Multi-FD

2022-06-16 Thread Daniel P . Berrangé

On Wed, Jun 15, 2022 at 05:43:28PM +0100, Daniel P. Berrangé wrote:
> On Fri, Jun 10, 2022 at 05:58:31PM +0530, manish.mishra wrote:
> > 
> > On 09/06/22 9:17 pm, Daniel P. Berrangé wrote:
> > > On Thu, Jun 09, 2022 at 07:33:01AM +, Het Gala wrote:
> > > > As of now, the multi-FD feature supports connection over the default 
> > > > network
> > > > only. This Patchset series is a Qemu side implementation of providing 
> > > > multiple
> > > > interfaces support for multi-FD. This enables us to fully utilize 
> > > > dedicated or
> > > > multiple NICs in case bonding of NICs is not possible.
> > > > 
> > > > 
> > > > Introduction
> > > > -
> > > > Multi-FD Qemu implementation currently supports connection only on the 
> > > > default
> > > > network. This forbids us from advantages like:
> > > > - Separating VM live migration traffic from the default network.
> > 
> > Hi Daniel,
> > 
> > I totally understand your concern around this approach increasing compexity 
> > inside qemu,
> > 
> > when similar things can be done with NIC teaming. But we thought this 
> > approach provides
> > 
> > much more flexibility to user in few cases like.
> > 
> > 1. We checked our customer data, almost all of the host had multiple NIC, 
> > but LACP support
> > 
> >     in their setups was very rare. So for those cases this approach can 
> > help in utilise multiple
> > 
> >     NICs as teaming is not possible there.
> 
> AFAIK,  LACP is not required in order to do link aggregation with Linux.
> Traditional Linux bonding has no special NIC hardware or switch requirements,
> so LACP is merely a "nice to have" in order to simplify some aspects.
> 
> IOW, migration with traffic spread across multiple NICs is already
> possible AFAICT.
> 
> I can understand that some people may not have actually configured
> bonding on their hosts, but it is not unreasonable to request that
> they do so, if they want to take advantage fo aggrated bandwidth.
> 
> It has the further benefit that it will be fault tolerant. With
> this proposal if any single NIC has a problem, the whole migration
> will get stuck. With kernel level bonding, if any single NIC haus
> a problem, it'll get offlined by the kernel and migration will
> continue to  work across remaining active NICs.
> 
> > 2. We have seen requests recently to separate out traffic of storage, VM 
> > netwrok, migration
> > 
> >     over different vswitch which can be backed by 1 or more NICs as this 
> > give better
> > 
> >     predictability and assurance. So host with multiple ips/vswitches can 
> > be very common
> > 
> >     environment. In this kind of enviroment this approach gives per vm or 
> > migration level
> > 
> >     flexibilty, like for critical VM we can still use bandwidth from all 
> > available vswitch/interface
> > 
> >     but for normal VM they can keep live migration only on dedicated NICs 
> > without changing
> > 
> >     complete host network topology.
> > 
> >     At final we want it to be something like this [, 
> > , ]
> > 
> >     to provide bandwidth_control per interface.
> 
> Again, it is already possible to separate migration traffic from storage
> traffic, from other network traffic. The target IP given will influence
> which NIC is used based on routing table and I know this is already
> done widely with OpenStack deployments.

Actually I should clarify this is only practical if the two NICs are
using different IP subnets, otherwise routing rules are not viable.
So needing to set source IP would be needed to select between a pair
of NICs on the same IP subnet.

Previous usage I've seen has always setup fully distinct IP subnets
for generic vs storage vs migration network traffic.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: New "IndustryStandard" fw_cfg?

2022-06-16 Thread Gerd Hoffmann

  Hi,

> After re-read and re-think, I think the problem is better to state as: we
> need an interface for QEMU to tell OVMF how much memory it needs to accept,
> from [Minimum to All]. So for the case that user wants to boot an
> partial-enabled confidential VMs (like current Linux TDX and SNP guest),
> user needs to specify from QEMU to tell OVMF to accept all the memory.

Asking the user to manually configure stuff sucks, that's why I think
it makes sense to let firmware and guest negotiate this automatically.

That doesn't work today though, so we will need some config option
indeed.

The proposal in the parallel thread is to just accept all low memory
(below 4G) unconditionally.  So maybe it is enough to have:

  * accept all memory below 4G
  * accept all memory

Possibly we need:

  * accept all memory below 4G
  * accept all memory below 4G, plus x GB of high memory.
  * accept all memory

In any case the config option should be designed in a way that we can
add a 'automatic' choice later, i.e. we can have ...

  * automatic (default)
  * accept all memory below 4G
  * accept all memory

... once the automatic negotiation is available.

take care,
  Gerd

[PULL 03/21] qmp: add filtering of statistics by target vCPU

2022-06-16 Thread Paolo Bonzini

Introduce a simple filtering of statistics, that allows to retrieve
statistics for a subset of the guest vCPUs.  This will be used for
example by the HMP monitor, in order to retrieve the statistics
for the currently selected CPU.

Example:
{ "execute": "query-stats",
  "arguments": {
"target": "vcpu",
"vcpus": [ "/machine/unattached/device[2]",
   "/machine/unattached/device[4]" ] } }

Extracted from a patch by Mark Kanda.

Reviewed-by: Markus Armbruster 
Signed-off-by: Paolo Bonzini 
---
 accel/kvm/kvm-all.c |  9 +++--
 include/monitor/stats.h | 11 ++-
 monitor/qmp-cmds.c  | 34 +-
 qapi/stats.json | 24 +++-
 4 files changed, 69 insertions(+), 9 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 7cc9e33bab..547de842fd 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2311,7 +2311,8 @@ bool kvm_dirty_ring_enabled(void)
 return kvm_state->kvm_dirty_ring_size ? true : false;
 }
 
-static void query_stats_cb(StatsResultList **result, StatsTarget target, Error 
**errp);
+static void query_stats_cb(StatsResultList **result, StatsTarget target,
+   strList *targets, Error **errp);
 static void query_stats_schemas_cb(StatsSchemaList **result, Error **errp);
 
 static int kvm_init(MachineState *ms)
@@ -4038,7 +4039,8 @@ static void query_stats_schema_vcpu(CPUState *cpu, 
run_on_cpu_data data)
 close(stats_fd);
 }
 
-static void query_stats_cb(StatsResultList **result, StatsTarget target, Error 
**errp)
+static void query_stats_cb(StatsResultList **result, StatsTarget target,
+   strList *targets, Error **errp)
 {
 KVMState *s = kvm_state;
 CPUState *cpu;
@@ -4062,6 +4064,9 @@ static void query_stats_cb(StatsResultList **result, 
StatsTarget target, Error *
 stats_args.result.stats = result;
 stats_args.errp = errp;
 CPU_FOREACH(cpu) {
+if (!apply_str_list_filter(cpu->parent_obj.canonical_path, 
targets)) {
+continue;
+}
 run_on_cpu(cpu, query_stats_vcpu, 
RUN_ON_CPU_HOST_PTR(&stats_args));
 }
 break;
diff --git a/include/monitor/stats.h b/include/monitor/stats.h
index 912eeadb2f..8c50feeaa9 100644
--- a/include/monitor/stats.h
+++ b/include/monitor/stats.h
@@ -11,7 +11,7 @@
 #include "qapi/qapi-types-stats.h"
 
 typedef void StatRetrieveFunc(StatsResultList **result, StatsTarget target,
-  Error **errp);
+  strList *targets, Error **errp);
 typedef void SchemaRetrieveFunc(StatsSchemaList **result, Error **errp);
 
 /*
@@ -31,4 +31,13 @@ void add_stats_entry(StatsResultList **, StatsProvider, 
const char *id,
 void add_stats_schema(StatsSchemaList **, StatsProvider, StatsTarget,
   StatsSchemaValueList *);
 
+/*
+ * True if a string matches the filter passed to the stats_fn callabck,
+ * false otherwise.
+ *
+ * Note that an empty list means no filtering, i.e. all strings will
+ * return true.
+ */
+bool apply_str_list_filter(const char *string, strList *list);
+
 #endif /* STATS_H */
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index a6ac8d7473..5f8f1e620b 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -468,9 +468,26 @@ static bool invoke_stats_cb(StatsCallbacks *entry,
 StatsFilter *filter,
 Error **errp)
 {
+strList *targets = NULL;
 ERRP_GUARD();
 
-entry->stats_cb(stats_results, filter->target, errp);
+switch (filter->target) {
+case STATS_TARGET_VM:
+break;
+case STATS_TARGET_VCPU:
+if (filter->u.vcpu.has_vcpus) {
+if (!filter->u.vcpu.vcpus) {
+/* No targets allowed?  Return no statistics.  */
+return true;
+}
+targets = filter->u.vcpu.vcpus;
+}
+break;
+default:
+abort();
+}
+
+entry->stats_cb(stats_results, filter->target, targets, errp);
 if (*errp) {
 qapi_free_StatsResultList(*stats_results);
 *stats_results = NULL;
@@ -536,3 +553,18 @@ void add_stats_schema(StatsSchemaList **schema_results,
 entry->stats = stats_list;
 QAPI_LIST_PREPEND(*schema_results, entry);
 }
+
+bool apply_str_list_filter(const char *string, strList *list)
+{
+strList *str_list = NULL;
+
+if (!list) {
+return true;
+}
+for (str_list = list; str_list; str_list = str_list->next) {
+if (g_str_equal(string, str_list->value)) {
+return true;
+}
+}
+return false;
+}
diff --git a/qapi/stats.json b/qapi/stats.json
index df7c4d886c..8c9abb57f1 100644
--- a/qapi/stats.json
+++ b/qapi/stats.json
@@ -70,15 +70,29 @@
   'data': [ 'vm', 'vcpu' ] }
 
 ##
-# @StatsFilter:
+# @StatsVCPUFilter:
 #
-# The arguments to the query-stats command; specifies a target for which to
-# request

[PATCH] tests/vm: do not specify -bios option

2022-06-16 Thread Paolo Bonzini

When running from the build tree, the executable is able to find
the BIOS on its own; when running from the source tree, a firmware
blob should already be installed and there is no guarantee that
the one in the source tree works with the QEMU that is being used for
the installation.

Just remove the -bios option, since it is unnecessary and in fact
there are other x86 VM tests that do not bother specifying it.

Signed-off-by: Paolo Bonzini 
---
 tests/vm/fedora  | 1 -
 tests/vm/freebsd | 1 -
 tests/vm/netbsd  | 1 -
 tests/vm/openbsd | 1 -
 4 files changed, 4 deletions(-)

diff --git a/tests/vm/fedora b/tests/vm/fedora
index 92b78d6e2c..12eca919a0 100755
--- a/tests/vm/fedora
+++ b/tests/vm/fedora
@@ -79,7 +79,6 @@ class FedoraVM(basevm.BaseVM):
 self.exec_qemu_img("create", "-f", "qcow2", img_tmp, self.size)
 self.print_step("Booting installer")
 self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
 "-machine", "graphics=off",
 "-device", "VGA",
 "-cdrom", iso
diff --git a/tests/vm/freebsd b/tests/vm/freebsd
index 805db759d6..cd1fabde52 100755
--- a/tests/vm/freebsd
+++ b/tests/vm/freebsd
@@ -95,7 +95,6 @@ class FreeBSDVM(basevm.BaseVM):
 
 self.print_step("Booting installer")
 self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
 "-machine", "graphics=off",
 "-device", "VGA",
 "-cdrom", iso
diff --git a/tests/vm/netbsd b/tests/vm/netbsd
index 45aa9a7fda..aa883ec23c 100755
--- a/tests/vm/netbsd
+++ b/tests/vm/netbsd
@@ -86,7 +86,6 @@ class NetBSDVM(basevm.BaseVM):
 
 self.print_step("Booting installer")
 self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
 "-machine", "graphics=off",
 "-cdrom", iso
 ])
diff --git a/tests/vm/openbsd b/tests/vm/openbsd
index 13c8254214..6f1b6f5b98 100755
--- a/tests/vm/openbsd
+++ b/tests/vm/openbsd
@@ -82,7 +82,6 @@ class OpenBSDVM(basevm.BaseVM):
 
 self.print_step("Booting installer")
 self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
 "-machine", "graphics=off",
 "-device", "VGA",
 "-cdrom", iso
-- 
2.36.1

[PULL 04/21] cutils: add functions for IEC and SI prefixes

2022-06-16 Thread Paolo Bonzini

Extract the knowledge of IEC and SI prefixes out of size_to_str and
freq_to_str, so that it can be reused when printing statistics.

Signed-off-by: Paolo Bonzini 
---
 include/qemu/cutils.h| 18 ++
 tests/unit/test-cutils.c | 52 
 util/cutils.c| 34 +++---
 3 files changed, 95 insertions(+), 9 deletions(-)

diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 40e10e19a7..d3e532b64c 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -1,6 +1,24 @@
 #ifndef QEMU_CUTILS_H
 #define QEMU_CUTILS_H
 
+/*
+ * si_prefix:
+ * @exp10: exponent of 10, a multiple of 3 between -18 and 18 inclusive.
+ *
+ * Return a SI prefix (n, u, m, K, M, etc.) corresponding
+ * to the given exponent of 10.
+ */
+const char *si_prefix(unsigned int exp10);
+
+/*
+ * iec_binary_prefix:
+ * @exp2: exponent of 2, a multiple of 10 between 0 and 60 inclusive.
+ *
+ * Return an IEC binary prefix (Ki, Mi, etc.) corresponding
+ * to the given exponent of 2.
+ */
+const char *iec_binary_prefix(unsigned int exp2);
+
 /**
  * pstrcpy:
  * @buf: buffer to copy string into
diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 98671f1ac3..f5b780f012 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -2450,6 +2450,50 @@ static void test_qemu_strtosz_metric(void)
 g_assert(endptr == str + 7);
 }
 
+static void test_freq_to_str(void)
+{
+g_assert_cmpstr(freq_to_str(999), ==, "999 Hz");
+g_assert_cmpstr(freq_to_str(1000), ==, "1 KHz");
+g_assert_cmpstr(freq_to_str(1010), ==, "1.01 KHz");
+}
+
+static void test_size_to_str(void)
+{
+g_assert_cmpstr(size_to_str(0), ==, "0 B");
+g_assert_cmpstr(size_to_str(1), ==, "1 B");
+g_assert_cmpstr(size_to_str(1016), ==, "0.992 KiB");
+g_assert_cmpstr(size_to_str(1024), ==, "1 KiB");
+g_assert_cmpstr(size_to_str(512ull << 20), ==, "512 MiB");
+}
+
+static void test_iec_binary_prefix(void)
+{
+g_assert_cmpstr(iec_binary_prefix(0), ==, "");
+g_assert_cmpstr(iec_binary_prefix(10), ==, "Ki");
+g_assert_cmpstr(iec_binary_prefix(20), ==, "Mi");
+g_assert_cmpstr(iec_binary_prefix(30), ==, "Gi");
+g_assert_cmpstr(iec_binary_prefix(40), ==, "Ti");
+g_assert_cmpstr(iec_binary_prefix(50), ==, "Pi");
+g_assert_cmpstr(iec_binary_prefix(60), ==, "Ei");
+}
+
+static void test_si_prefix(void)
+{
+g_assert_cmpstr(si_prefix(-18), ==, "a");
+g_assert_cmpstr(si_prefix(-15), ==, "f");
+g_assert_cmpstr(si_prefix(-12), ==, "p");
+g_assert_cmpstr(si_prefix(-9), ==, "n");
+g_assert_cmpstr(si_prefix(-6), ==, "u");
+g_assert_cmpstr(si_prefix(-3), ==, "m");
+g_assert_cmpstr(si_prefix(0), ==, "");
+g_assert_cmpstr(si_prefix(3), ==, "K");
+g_assert_cmpstr(si_prefix(6), ==, "M");
+g_assert_cmpstr(si_prefix(9), ==, "G");
+g_assert_cmpstr(si_prefix(12), ==, "T");
+g_assert_cmpstr(si_prefix(15), ==, "P");
+g_assert_cmpstr(si_prefix(18), ==, "E");
+}
+
 int main(int argc, char **argv)
 {
 g_test_init(&argc, &argv, NULL);
@@ -2729,5 +2773,13 @@ int main(int argc, char **argv)
 g_test_add_func("/cutils/strtosz/metric",
 test_qemu_strtosz_metric);
 
+g_test_add_func("/cutils/size_to_str",
+test_size_to_str);
+g_test_add_func("/cutils/freq_to_str",
+test_freq_to_str);
+g_test_add_func("/cutils/iec_binary_prefix",
+test_iec_binary_prefix);
+g_test_add_func("/cutils/si_prefix",
+test_si_prefix);
 return g_test_run();
 }
diff --git a/util/cutils.c b/util/cutils.c
index a58bcfd80e..6d04e52907 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -872,6 +872,25 @@ int parse_debug_env(const char *name, int max, int initial)
 return debug;
 }
 
+const char *si_prefix(unsigned int exp10)
+{
+static const char *prefixes[] = {
+"a", "f", "p", "n", "u", "m", "", "K", "M", "G", "T", "P", "E"
+};
+
+exp10 += 18;
+assert(exp10 % 3 == 0 && exp10 / 3 < ARRAY_SIZE(prefixes));
+return prefixes[exp10 / 3];
+}
+
+const char *iec_binary_prefix(unsigned int exp2)
+{
+static const char *prefixes[] = { "", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei" };
+
+assert(exp2 % 10 == 0 && exp2 / 10 < ARRAY_SIZE(prefixes));
+return prefixes[exp2 / 10];
+}
+
 /*
  * Return human readable string for size @val.
  * @val can be anything that uint64_t allows (no more than "16 EiB").
@@ -880,7 +899,6 @@ int parse_debug_env(const char *name, int max, int initial)
  */
 char *size_to_str(uint64_t val)
 {
-static const char *suffixes[] = { "", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei" };
 uint64_t div;
 int i;
 
@@ -891,25 +909,23 @@ char *size_to_str(uint64_t val)
  * (see e41b509d68afb1f for more info)
  */
 frexp(val / (1000.0 / 1024.0), &i);
-i = (i - 1) / 10;
-div = 1ULL << (i * 10);
+i = (i - 1) / 10 * 10;
+div = 1ULL << i;
 
-return g_s

[PULL 01/21] qmp: Support for querying stats

2022-06-16 Thread Paolo Bonzini

From: Mark Kanda 

Gathering statistics is important for development, for monitoring and
for performance measurement.  There are tools such as kvm_stat that do
this and they rely on the _user_ knowing the interesting data points
rather than the tool (which can treat them as opaque).

The commands introduced in this commit introduce QMP support for
querying stats; the goal is to take the capabilities of these tools
and making them available throughout the whole virtualization stack,
so that one can observe, monitor and measure virtual machines without
having shell access + root on the host that runs them.

query-stats returns a list of all stats per target type (only VM
and vCPU to start); future commits add extra options for specifying
stat names, vCPU qom paths, and providers.  All these are used by the
HMP command "info stats".  Because of the development usecases around
statistics, a good HMP interface is important.

query-stats-schemas returns a list of stats included in each target
type, with an option for specifying the provider.  The concepts in the
schema are based on the KVM binary stats' own introspection data, just
translated to QAPI.

There are two reasons to have a separate schema that is not tied to
the QAPI schema.  The first is the contents of the schemas: the new
introspection data provides different information than the QAPI data,
namely unit of measurement, how the numbers are gathered and change
(peak/instant/cumulative/histogram), and histogram bucket sizes.
There's really no reason to have this kind of metadata in the QAPI
introspection schema (except possibly for the unit of measure, but
there's a very weak justification).

Another reason is the dynamicity of the schema.  The QAPI introspection
data is very much static; and while QOM is somewhat more dynamic,
generally we consider that to be a bug rather than a feature these days.
On the other hand, the statistics that are exposed by QEMU might be
passed through from another source, such as KVM, and the disadvantages of
manually updating the QAPI schema for outweight the benefits from vetting
the statistics and filtering out anything that seems "too unstable".
Running old QEMU with new kernel is a supported usecase; if old QEMU
cannot expose statistics from a new kernel, or if a kernel developer
needs to change QEMU before gathering new info from the new kernel,
then that is a poor user interface.

The framework provides a method to register callbacks for these QMP
commands.  Most of the work in fact is done by the callbacks, and a
large majority of this patch is new QAPI structs and commands.

Examples (with KVM stats):

- Query all VM stats:

{ "execute": "query-stats", "arguments" : { "target": "vm" } }

{ "return": [
 { "provider": "kvm",
   "stats": [
  { "name": "max_mmu_page_hash_collisions", "value": 0 },
  { "name": "max_mmu_rmap_size", "value": 0 },
  { "name": "nx_lpage_splits", "value": 148 },
  ... ] },
 { "provider": "xyz",
   "stats": [ ... ] }
] }

- Query all vCPU stats:

{ "execute": "query-stats", "arguments" : { "target": "vcpu" } }

{ "return": [
 { "provider": "kvm",
   "qom_path": "/machine/unattached/device[0]"
   "stats": [
  { "name": "guest_mode", "value": 0 },
  { "name": "directed_yield_successful", "value": 0 },
  { "name": "directed_yield_attempted", "value": 106 },
  ... ] },
 { "provider": "kvm",
   "qom_path": "/machine/unattached/device[1]"
   "stats": [
  { "name": "guest_mode", "value": 0 },
  { "name": "directed_yield_successful", "value": 0 },
  { "name": "directed_yield_attempted", "value": 106 },
  ... ] },
] }

- Retrieve the schemas:

{ "execute": "query-stats-schemas" }

{ "return": [
{ "provider": "kvm",
  "target": "vcpu",
  "stats": [
 { "name": "guest_mode",
   "unit": "none",
   "base": 10,
   "exponent": 0,
   "type": "instant" },
{ "name": "directed_yield_successful",
   "unit": "none",
   "base": 10,
   "exponent": 0,
   "type": "cumulative" },
... ]
},
{ "provider": "kvm",
  "target": "vm",
  "stats": [
{ "name": "max_mmu_page_hash_collisions",
   "unit": "none",
   "base": 10,
   "exponent": 0,
   "type": "peak" },
... ]
},
{ "provider": "xyz",
  "target": "vm",
  "stats": [ ... ]
}
] }

Signed-off-by: Mark Kanda 
Reviewed-by: Markus Armbruster 
Signed-off-by: Paolo Bonzini 
---
 include/monitor/stats.h |  34 +++
 monitor/qmp-cmds.c  |  95 ++
 qapi/meson.build|   1 +
 qapi/qapi-schema.json   |   1 +
 qapi/stats.json | 216 
 5 files changed, 347 insertions(+)
 create mode 100644 include/monitor/stats.h
 create mode 100644 qapi/stats.json

diff --git a/include/monitor/stats.h b/include/moni

[PULL 06/21] qmp: add filtering of statistics by provider

2022-06-16 Thread Paolo Bonzini

Allow retrieving the statistics from a specific provider only.
This can be used in the future by HMP commands such as "info
sync-profile" or "info profile".  The next patch also adds
filter-by-provider capabilities to the HMP equivalent of
query-stats, "info stats".

Example:

{ "execute": "query-stats",
  "arguments": {
"target": "vm",
"providers": [
  { "provider": "kvm" } ] } }

The QAPI is a bit more verbose than just a list of StatsProvider,
so that it can be subsequently extended with filtering of statistics
by name.

If a provider is specified more than once in the filter, each request
will be included separately in the output.

Extracted from a patch by Mark Kanda.

Reviewed-by: Markus Armbruster 
Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Paolo Bonzini 
---
 accel/kvm/kvm-all.c |  3 ++-
 include/monitor/stats.h |  4 +++-
 monitor/hmp-cmds.c  |  2 +-
 monitor/qmp-cmds.c  | 41 -
 qapi/stats.json | 19 +--
 5 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 547de842fd..2e819beaeb 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2644,7 +2644,8 @@ static int kvm_init(MachineState *ms)
 }
 
 if (kvm_check_extension(kvm_state, KVM_CAP_BINARY_STATS_FD)) {
-add_stats_callbacks(query_stats_cb, query_stats_schemas_cb);
+add_stats_callbacks(STATS_PROVIDER_KVM, query_stats_cb,
+query_stats_schemas_cb);
 }
 
 return 0;
diff --git a/include/monitor/stats.h b/include/monitor/stats.h
index 8c50feeaa9..80a523dd29 100644
--- a/include/monitor/stats.h
+++ b/include/monitor/stats.h
@@ -17,10 +17,12 @@ typedef void SchemaRetrieveFunc(StatsSchemaList **result, 
Error **errp);
 /*
  * Register callbacks for the QMP query-stats command.
  *
+ * @provider: stats provider checked against QMP command arguments
  * @stats_fn: routine to query stats:
  * @schema_fn: routine to query stat schemas:
  */
-void add_stats_callbacks(StatRetrieveFunc *stats_fn,
+void add_stats_callbacks(StatsProvider provider,
+ StatRetrieveFunc *stats_fn,
  SchemaRetrieveFunc *schemas_fn);
 
 /*
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 04d5ee8fb7..9180cf1841 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -2397,7 +2397,7 @@ void hmp_info_stats(Monitor *mon, const QDict *qdict)
 goto exit_no_print;
 }
 
-schema = qmp_query_stats_schemas(&err);
+schema = qmp_query_stats_schemas(false, STATS_PROVIDER__MAX, &err);
 if (err) {
 goto exit;
 }
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index 5f8f1e620b..e49ab345d7 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -445,6 +445,7 @@ HumanReadableText *qmp_x_query_irq(Error **errp)
 }
 
 typedef struct StatsCallbacks {
+StatsProvider provider;
 StatRetrieveFunc *stats_cb;
 SchemaRetrieveFunc *schemas_cb;
 QTAILQ_ENTRY(StatsCallbacks) next;
@@ -453,10 +454,12 @@ typedef struct StatsCallbacks {
 static QTAILQ_HEAD(, StatsCallbacks) stats_callbacks =
 QTAILQ_HEAD_INITIALIZER(stats_callbacks);
 
-void add_stats_callbacks(StatRetrieveFunc *stats_fn,
+void add_stats_callbacks(StatsProvider provider,
+ StatRetrieveFunc *stats_fn,
  SchemaRetrieveFunc *schemas_fn)
 {
 StatsCallbacks *entry = g_new(StatsCallbacks, 1);
+entry->provider = provider;
 entry->stats_cb = stats_fn;
 entry->schemas_cb = schemas_fn;
 
@@ -465,12 +468,18 @@ void add_stats_callbacks(StatRetrieveFunc *stats_fn,
 
 static bool invoke_stats_cb(StatsCallbacks *entry,
 StatsResultList **stats_results,
-StatsFilter *filter,
+StatsFilter *filter, StatsRequest *request,
 Error **errp)
 {
 strList *targets = NULL;
 ERRP_GUARD();
 
+if (request) {
+if (request->provider != entry->provider) {
+return true;
+}
+}
+
 switch (filter->target) {
 case STATS_TARGET_VM:
 break;
@@ -500,27 +509,41 @@ StatsResultList *qmp_query_stats(StatsFilter *filter, 
Error **errp)
 {
 StatsResultList *stats_results = NULL;
 StatsCallbacks *entry;
+StatsRequestList *request;
 
 QTAILQ_FOREACH(entry, &stats_callbacks, next) {
-if (!invoke_stats_cb(entry, &stats_results, filter, errp)) {
-break;
+if (filter->has_providers) {
+for (request = filter->providers; request; request = 
request->next) {
+if (!invoke_stats_cb(entry, &stats_results, filter,
+ request->value, errp)) {
+break;
+}
+}
+} else {
+if (!invoke_stats_cb(entry, &stats_results, filter, NULL, errp)) {
+break;
+

[PULL 00/21] Statistics, preconfig and cleanup patches for 2022-06-16

2022-06-16 Thread Paolo Bonzini

The following changes since commit debd0753663bc89c86f5462a53268f2e3f680f60:

  Merge tag 'pull-testing-next-140622-1' of https://github.com/stsquad/qemu 
into staging (2022-06-13 21:10:57 -0700)

are available in the Git repository at:

  https://gitlab.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to 76ca98b0f85222601bd449252ac71df19e0dab29:

  build: include pc-bios/ part in the ROMS variable (2022-06-15 11:12:32 +0200)


* statistics subsystem
* virtio reset cleanups
* build system cleanups
* fix Cirrus CI


Alexander Bulekov (1):
  build: fix check for -fsanitize-coverage-allowlist

Mark Kanda (3):
  qmp: Support for querying stats
  kvm: Support for querying fd-based stats
  hmp: add basic "info stats" implementation

Paolo Bonzini (16):
  qmp: add filtering of statistics by target vCPU
  cutils: add functions for IEC and SI prefixes
  qmp: add filtering of statistics by provider
  hmp: add filtering of statistics by provider
  qmp: add filtering of statistics by name
  hmp: add filtering of statistics by name
  block: add more commands to preconfig mode
  s390x: simplify virtio_ccw_reset_virtio
  virtio-mmio: stop ioeventfd on legacy reset
  virtio: stop ioeventfd on reset
  virtio-mmio: cleanup reset
  configure: update list of preserved environment variables
  configure: cleanup -fno-pie detection
  tests/vm: allow running tests in an unconfigured source tree
  meson: put cross compiler info in a separate section
  build: include pc-bios/ part in the ROMS variable

Zhenzhong Duan (1):
  q35：Enable TSEG only when G_SMRAME and TSEG_EN both enabled

 Makefile  |  12 +-
 accel/kvm/kvm-all.c   | 403 ++
 configure |  22 +--
 hmp-commands-info.hx  |  14 ++
 hmp-commands.hx   |  14 ++
 hw/pci-host/q35.c |   3 +-
 hw/s390x/virtio-ccw.c |  12 +-
 hw/virtio/virtio-bus.c|   1 +
 hw/virtio/virtio-mmio.c   |  18 +--
 hw/virtio/virtio-pci.c|   1 -
 include/monitor/hmp.h |   1 +
 include/monitor/stats.h   |  45 ++
 include/qemu/cutils.h |  18 +++
 meson.build   |  25 +--
 monitor/hmp-cmds.c| 232 ++
 monitor/qmp-cmds.c| 155 ++
 qapi/block-core.json  | 117 +-
 qapi/block-export.json|  21 ++-
 qapi/block.json   |   6 +-
 qapi/meson.build  |   1 +
 qapi/qapi-schema.json |   1 +
 qapi/stats.json   | 249 
 tests/unit/test-cutils.c  |  52 ++
 tests/vm/Makefile.include |  26 +--
 util/cutils.c |  34 ++--
 25 files changed, 1368 insertions(+), 115 deletions(-)
 create mode 100644 include/monitor/stats.h
 create mode 100644 qapi/stats.json
-- 
2.36.1

[PULL 05/21] hmp: add basic "info stats" implementation

2022-06-16 Thread Paolo Bonzini

From: Mark Kanda 

Add an HMP command to retrieve statistics collected at run-time.
The command will retrieve and print either all VM-level statistics,
or all vCPU-level statistics for the currently selected CPU.

Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Paolo Bonzini 
---
 hmp-commands-info.hx  |  13 +++
 include/monitor/hmp.h |   1 +
 monitor/hmp-cmds.c| 190 ++
 3 files changed, 204 insertions(+)

diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 834bed089e..28757768f7 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -894,3 +894,16 @@ SRST
   ``info via``
 Show guest mos6522 VIA devices.
 ERST
+
+{
+.name   = "stats",
+.args_type  = "target:s",
+.params = "target",
+.help   = "show statistics; target is either vm or vcpu",
+.cmd= hmp_info_stats,
+},
+
+SRST
+  ``stats``
+Show runtime-collected statistics
+ERST
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 96d014826a..2e89a97bd6 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -133,5 +133,6 @@ void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict);
 void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict);
 void hmp_human_readable_text_helper(Monitor *mon,
 HumanReadableText *(*qmp_handler)(Error 
**));
+void hmp_info_stats(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 622c783c32..04d5ee8fb7 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -40,6 +40,7 @@
 #include "qapi/qapi-commands-pci.h"
 #include "qapi/qapi-commands-rocker.h"
 #include "qapi/qapi-commands-run-state.h"
+#include "qapi/qapi-commands-stats.h"
 #include "qapi/qapi-commands-tpm.h"
 #include "qapi/qapi-commands-ui.h"
 #include "qapi/qapi-visit-net.h"
@@ -52,6 +53,7 @@
 #include "ui/console.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
+#include "hw/core/cpu.h"
 #include "hw/intc/intc.h"
 #include "migration/snapshot.h"
 #include "migration/misc.h"
@@ -2239,3 +2241,191 @@ void hmp_info_memory_size_summary(Monitor *mon, const 
QDict *qdict)
 }
 hmp_handle_error(mon, err);
 }
+
+static void print_stats_schema_value(Monitor *mon, StatsSchemaValue *value)
+{
+const char *unit = NULL;
+monitor_printf(mon, "%s (%s%s", value->name, 
StatsType_str(value->type),
+   value->has_unit || value->exponent ? ", " : "");
+
+if (value->has_unit) {
+if (value->unit == STATS_UNIT_SECONDS) {
+unit = "s";
+} else if (value->unit == STATS_UNIT_BYTES) {
+unit = "B";
+}
+}
+
+if (unit && value->base == 10 &&
+value->exponent >= -18 && value->exponent <= 18 &&
+value->exponent % 3 == 0) {
+monitor_printf(mon, "%s", si_prefix(value->exponent));
+} else if (unit && value->base == 2 &&
+   value->exponent >= 0 && value->exponent <= 60 &&
+   value->exponent % 10 == 0) {
+
+monitor_printf(mon, "%s", iec_binary_prefix(value->exponent));
+} else if (value->exponent) {
+/* Use exponential notation and write the unit's English name */
+monitor_printf(mon, "* %d^%d%s",
+   value->base, value->exponent,
+   value->has_unit ? " " : "");
+unit = NULL;
+}
+
+if (value->has_unit) {
+monitor_printf(mon, "%s", unit ? unit : StatsUnit_str(value->unit));
+}
+
+/* Print bucket size for linear histograms */
+if (value->type == STATS_TYPE_LINEAR_HISTOGRAM && value->has_bucket_size) {
+monitor_printf(mon, ", bucket size=%d", value->bucket_size);
+}
+monitor_printf(mon, ")");
+}
+
+static StatsSchemaValueList *find_schema_value_list(
+StatsSchemaList *list, StatsProvider provider,
+StatsTarget target)
+{
+StatsSchemaList *node;
+
+for (node = list; node; node = node->next) {
+if (node->value->provider == provider &&
+node->value->target == target) {
+return node->value->stats;
+}
+}
+return NULL;
+}
+
+static void print_stats_results(Monitor *mon, StatsTarget target,
+StatsResult *result,
+StatsSchemaList *schema)
+{
+/* Find provider schema */
+StatsSchemaValueList *schema_value_list =
+find_schema_value_list(schema, result->provider, target);
+StatsList *stats_list;
+
+if (!schema_value_list) {
+monitor_printf(mon, "failed to find schema list for %s\n",
+   StatsProvider_str(result->provider));
+return;
+}
+
+monitor_printf(mon, "provider: %s\n",
+   StatsProvider_str(result->provider));
+
+for (stats_list = result->stats; stats_list;
+ stats_list = stats_list->next,
+ schema_value_list = schema_value_list->next) {
+
+

[PULL 15/21] configure: update list of preserved environment variables

2022-06-16 Thread Paolo Bonzini

INSTALL and LIBTOOL are not used anymore, but OBJCFLAGS is new and
was not listed.

Signed-off-by: Paolo Bonzini 
---
 configure | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure b/configure
index 4b12a8094c..d41c7eddff 100755
--- a/configure
+++ b/configure
@@ -2722,13 +2722,12 @@ preserve_env CC
 preserve_env CFLAGS
 preserve_env CXX
 preserve_env CXXFLAGS
-preserve_env INSTALL
 preserve_env LD
 preserve_env LDFLAGS
 preserve_env LD_LIBRARY_PATH
-preserve_env LIBTOOL
 preserve_env MAKE
 preserve_env NM
+preserve_env OBJCFLAGS
 preserve_env OBJCOPY
 preserve_env PATH
 preserve_env PKG_CONFIG
-- 
2.36.1

[PULL 07/21] hmp: add filtering of statistics by provider

2022-06-16 Thread Paolo Bonzini

Allow the user to request statistics for a single provider of interest.
Extracted from a patch by Mark Kanda.

Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Paolo Bonzini 
---
 hmp-commands-info.hx |  7 ---
 monitor/hmp-cmds.c   | 39 ---
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 28757768f7..a67040443b 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -897,9 +897,10 @@ ERST
 
 {
 .name   = "stats",
-.args_type  = "target:s",
-.params = "target",
-.help   = "show statistics; target is either vm or vcpu",
+.args_type  = "target:s,provider:s?",
+.params = "target [provider]",
+.help   = "show statistics for the given target (vm or vcpu); 
optionally filter by "
+  "provider",
 .cmd= hmp_info_stats,
 },
 
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 9180cf1841..9278439533 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -2300,6 +2300,7 @@ static StatsSchemaValueList *find_schema_value_list(
 }
 
 static void print_stats_results(Monitor *mon, StatsTarget target,
+bool show_provider,
 StatsResult *result,
 StatsSchemaList *schema)
 {
@@ -2314,8 +2315,10 @@ static void print_stats_results(Monitor *mon, 
StatsTarget target,
 return;
 }
 
-monitor_printf(mon, "provider: %s\n",
-   StatsProvider_str(result->provider));
+if (show_provider) {
+monitor_printf(mon, "provider: %s\n",
+   StatsProvider_str(result->provider));
+}
 
 for (stats_list = result->stats; stats_list;
  stats_list = stats_list->next,
@@ -2356,7 +2359,8 @@ static void print_stats_results(Monitor *mon, StatsTarget 
target,
 }
 
 /* Create the StatsFilter that is needed for an "info stats" invocation.  */
-static StatsFilter *stats_filter(StatsTarget target, int cpu_index)
+static StatsFilter *stats_filter(StatsTarget target, int cpu_index,
+ StatsProvider provider)
 {
 StatsFilter *filter = g_malloc0(sizeof(*filter));
 
@@ -2378,12 +2382,25 @@ static StatsFilter *stats_filter(StatsTarget target, 
int cpu_index)
 default:
 break;
 }
+
+if (provider == STATS_PROVIDER__MAX) {
+return filter;
+}
+
+/* "info stats" can only query either one or all the providers.  */
+filter->has_providers = true;
+filter->providers = g_new0(StatsRequestList, 1);
+filter->providers->value = g_new0(StatsRequest, 1);
+filter->providers->value->provider = provider;
 return filter;
 }
 
 void hmp_info_stats(Monitor *mon, const QDict *qdict)
 {
 const char *target_str = qdict_get_str(qdict, "target");
+const char *provider_str = qdict_get_try_str(qdict, "provider");
+
+StatsProvider provider = STATS_PROVIDER__MAX;
 StatsTarget target;
 Error *err = NULL;
 g_autoptr(StatsSchemaList) schema = NULL;
@@ -2396,19 +2413,27 @@ void hmp_info_stats(Monitor *mon, const QDict *qdict)
 monitor_printf(mon, "invalid stats target %s\n", target_str);
 goto exit_no_print;
 }
+if (provider_str) {
+provider = qapi_enum_parse(&StatsProvider_lookup, provider_str, -1, 
&err);
+if (err) {
+monitor_printf(mon, "invalid stats provider %s\n", provider_str);
+goto exit_no_print;
+}
+}
 
-schema = qmp_query_stats_schemas(false, STATS_PROVIDER__MAX, &err);
+schema = qmp_query_stats_schemas(provider_str ? true : false,
+ provider, &err);
 if (err) {
 goto exit;
 }
 
 switch (target) {
 case STATS_TARGET_VM:
-filter = stats_filter(target, -1);
+filter = stats_filter(target, -1, provider);
 break;
 case STATS_TARGET_VCPU: {}
 int cpu_index = monitor_get_cpu_index(mon);
-filter = stats_filter(target, cpu_index);
+filter = stats_filter(target, cpu_index, provider);
 break;
 default:
 abort();
@@ -2419,7 +2444,7 @@ void hmp_info_stats(Monitor *mon, const QDict *qdict)
 goto exit;
 }
 for (entry = stats; entry; entry = entry->next) {
-print_stats_results(mon, target, entry->value, schema);
+print_stats_results(mon, target, provider_str == NULL, entry->value, 
schema);
 }
 
 exit:
-- 
2.36.1

[PULL 09/21] hmp: add filtering of statistics by name

2022-06-16 Thread Paolo Bonzini

Allow the user to request only a specific subset of statistics.
This can be useful when working on a feature or optimization that is
known to affect that statistic.

Example:

   (qemu) info stats vcpu halt_poll_fail_ns
   provider: kvm
   halt_poll_fail_ns (cumulative, ns): 0

In case multiple providers have the same statistic, the provider can be
specified too:

   (qemu) info stats vcpu halt_poll_fail_ns kvm
   provider: kvm
   halt_poll_fail_ns (cumulative, ns): 0

Extracted from a patch by Mark Kanda.

Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Paolo Bonzini 
---
 hmp-commands-info.hx |  8 
 monitor/hmp-cmds.c   | 35 ++-
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index a67040443b..3ffa24bd67 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -897,10 +897,10 @@ ERST
 
 {
 .name   = "stats",
-.args_type  = "target:s,provider:s?",
-.params = "target [provider]",
-.help   = "show statistics for the given target (vm or vcpu); 
optionally filter by "
-  "provider",
+.args_type  = "target:s,names:s?,provider:s?",
+.params = "target [names] [provider]",
+.help   = "show statistics for the given target (vm or vcpu); 
optionally filter by"
+  "name (comma-separated list, or * for all) and provider",
 .cmd= hmp_info_stats,
 },
 
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 9278439533..47a27326ee 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -2359,10 +2359,12 @@ static void print_stats_results(Monitor *mon, 
StatsTarget target,
 }
 
 /* Create the StatsFilter that is needed for an "info stats" invocation.  */
-static StatsFilter *stats_filter(StatsTarget target, int cpu_index,
- StatsProvider provider)
+static StatsFilter *stats_filter(StatsTarget target, const char *names,
+ int cpu_index, StatsProvider provider)
 {
 StatsFilter *filter = g_malloc0(sizeof(*filter));
+StatsProvider provider_idx;
+StatsRequestList *request_list = NULL;
 
 filter->target = target;
 switch (target) {
@@ -2383,15 +2385,29 @@ static StatsFilter *stats_filter(StatsTarget target, 
int cpu_index,
 break;
 }
 
-if (provider == STATS_PROVIDER__MAX) {
+if (!names && provider == STATS_PROVIDER__MAX) {
 return filter;
 }
 
-/* "info stats" can only query either one or all the providers.  */
+/*
+ * "info stats" can only query either one or all the providers.  Querying
+ * by name, but not by provider, requires the creation of one filter per
+ * provider.
+ */
+for (provider_idx = 0; provider_idx < STATS_PROVIDER__MAX; provider_idx++) 
{
+if (provider == STATS_PROVIDER__MAX || provider == provider_idx) {
+StatsRequest *request = g_new0(StatsRequest, 1);
+request->provider = provider_idx;
+if (names && !g_str_equal(names, "*")) {
+request->has_names = true;
+request->names = strList_from_comma_list(names);
+}
+QAPI_LIST_PREPEND(request_list, request);
+}
+}
+
 filter->has_providers = true;
-filter->providers = g_new0(StatsRequestList, 1);
-filter->providers->value = g_new0(StatsRequest, 1);
-filter->providers->value->provider = provider;
+filter->providers = request_list;
 return filter;
 }
 
@@ -2399,6 +2415,7 @@ void hmp_info_stats(Monitor *mon, const QDict *qdict)
 {
 const char *target_str = qdict_get_str(qdict, "target");
 const char *provider_str = qdict_get_try_str(qdict, "provider");
+const char *names = qdict_get_try_str(qdict, "names");
 
 StatsProvider provider = STATS_PROVIDER__MAX;
 StatsTarget target;
@@ -2429,11 +2446,11 @@ void hmp_info_stats(Monitor *mon, const QDict *qdict)
 
 switch (target) {
 case STATS_TARGET_VM:
-filter = stats_filter(target, -1, provider);
+filter = stats_filter(target, names, -1, provider);
 break;
 case STATS_TARGET_VCPU: {}
 int cpu_index = monitor_get_cpu_index(mon);
-filter = stats_filter(target, cpu_index, provider);
+filter = stats_filter(target, names, cpu_index, provider);
 break;
 default:
 abort();
-- 
2.36.1

[PULL 02/21] kvm: Support for querying fd-based stats

2022-06-16 Thread Paolo Bonzini

From: Mark Kanda 

Add support for querying fd-based KVM stats - as introduced by Linux kernel
commit:

cb082bfab59a ("KVM: stats: Add fd-based API to read binary stats data")

This allows the user to analyze the behavior of the VM without access
to debugfs.

Signed-off-by: Mark Kanda 
Signed-off-by: Paolo Bonzini 
---
 accel/kvm/kvm-all.c | 392 
 qapi/stats.json |   2 +-
 2 files changed, 393 insertions(+), 1 deletion(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index a4c4863f53..7cc9e33bab 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -47,6 +47,7 @@
 #include "kvm-cpus.h"
 
 #include "hw/boards.h"
+#include "monitor/stats.h"
 
 /* This check must be after config-host.h is included */
 #ifdef CONFIG_EVENTFD
@@ -2310,6 +2311,9 @@ bool kvm_dirty_ring_enabled(void)
 return kvm_state->kvm_dirty_ring_size ? true : false;
 }
 
+static void query_stats_cb(StatsResultList **result, StatsTarget target, Error 
**errp);
+static void query_stats_schemas_cb(StatsSchemaList **result, Error **errp);
+
 static int kvm_init(MachineState *ms)
 {
 MachineClass *mc = MACHINE_GET_CLASS(ms);
@@ -2638,6 +2642,10 @@ static int kvm_init(MachineState *ms)
 }
 }
 
+if (kvm_check_extension(kvm_state, KVM_CAP_BINARY_STATS_FD)) {
+add_stats_callbacks(query_stats_cb, query_stats_schemas_cb);
+}
+
 return 0;
 
 err:
@@ -3697,3 +3705,387 @@ static void kvm_type_init(void)
 }
 
 type_init(kvm_type_init);
+
+typedef struct StatsArgs {
+union StatsResultsType {
+StatsResultList **stats;
+StatsSchemaList **schema;
+} result;
+Error **errp;
+} StatsArgs;
+
+static StatsList *add_kvmstat_entry(struct kvm_stats_desc *pdesc,
+uint64_t *stats_data,
+StatsList *stats_list,
+Error **errp)
+{
+
+Stats *stats;
+uint64List *val_list = NULL;
+
+/* Only add stats that we understand.  */
+switch (pdesc->flags & KVM_STATS_TYPE_MASK) {
+case KVM_STATS_TYPE_CUMULATIVE:
+case KVM_STATS_TYPE_INSTANT:
+case KVM_STATS_TYPE_PEAK:
+case KVM_STATS_TYPE_LINEAR_HIST:
+case KVM_STATS_TYPE_LOG_HIST:
+break;
+default:
+return stats_list;
+}
+
+switch (pdesc->flags & KVM_STATS_UNIT_MASK) {
+case KVM_STATS_UNIT_NONE:
+case KVM_STATS_UNIT_BYTES:
+case KVM_STATS_UNIT_CYCLES:
+case KVM_STATS_UNIT_SECONDS:
+break;
+default:
+return stats_list;
+}
+
+switch (pdesc->flags & KVM_STATS_BASE_MASK) {
+case KVM_STATS_BASE_POW10:
+case KVM_STATS_BASE_POW2:
+break;
+default:
+return stats_list;
+}
+
+/* Alloc and populate data list */
+stats = g_new0(Stats, 1);
+stats->name = g_strdup(pdesc->name);
+stats->value = g_new0(StatsValue, 1);;
+
+if (pdesc->size == 1) {
+stats->value->u.scalar = *stats_data;
+stats->value->type = QTYPE_QNUM;
+} else {
+int i;
+for (i = 0; i < pdesc->size; i++) {
+QAPI_LIST_PREPEND(val_list, stats_data[i]);
+}
+stats->value->u.list = val_list;
+stats->value->type = QTYPE_QLIST;
+}
+
+QAPI_LIST_PREPEND(stats_list, stats);
+return stats_list;
+}
+
+static StatsSchemaValueList *add_kvmschema_entry(struct kvm_stats_desc *pdesc,
+ StatsSchemaValueList *list,
+ Error **errp)
+{
+StatsSchemaValueList *schema_entry = g_new0(StatsSchemaValueList, 1);
+schema_entry->value = g_new0(StatsSchemaValue, 1);
+
+switch (pdesc->flags & KVM_STATS_TYPE_MASK) {
+case KVM_STATS_TYPE_CUMULATIVE:
+schema_entry->value->type = STATS_TYPE_CUMULATIVE;
+break;
+case KVM_STATS_TYPE_INSTANT:
+schema_entry->value->type = STATS_TYPE_INSTANT;
+break;
+case KVM_STATS_TYPE_PEAK:
+schema_entry->value->type = STATS_TYPE_PEAK;
+break;
+case KVM_STATS_TYPE_LINEAR_HIST:
+schema_entry->value->type = STATS_TYPE_LINEAR_HISTOGRAM;
+schema_entry->value->bucket_size = pdesc->bucket_size;
+schema_entry->value->has_bucket_size = true;
+break;
+case KVM_STATS_TYPE_LOG_HIST:
+schema_entry->value->type = STATS_TYPE_LOG2_HISTOGRAM;
+break;
+default:
+goto exit;
+}
+
+switch (pdesc->flags & KVM_STATS_UNIT_MASK) {
+case KVM_STATS_UNIT_NONE:
+break;
+case KVM_STATS_UNIT_BYTES:
+schema_entry->value->has_unit = true;
+schema_entry->value->unit = STATS_UNIT_BYTES;
+break;
+case KVM_STATS_UNIT_CYCLES:
+schema_entry->value->has_unit = true;
+schema_entry->value->unit = STATS_UNIT_CYCLES;
+break;
+case KVM_STATS_UNIT_SECONDS:
+schema_entry->value->has_unit = true;
+schema_entry->value->unit = STATS_UNIT_

[PULL 14/21] virtio-mmio: cleanup reset

2022-06-16 Thread Paolo Bonzini

Make virtio_mmio_soft_reset reset the virtio device, which is performed by
both the "soft" and the "hard" reset; and then call virtio_mmio_soft_reset
from virtio_mmio_reset to emphasize that the latter is a superset of the
former.

Signed-off-by: Paolo Bonzini 
---
 hw/virtio/virtio-mmio.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 6d81a26473..d240efef97 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -72,12 +72,12 @@ static void virtio_mmio_soft_reset(VirtIOMMIOProxy *proxy)
 {
 int i;
 
-if (proxy->legacy) {
-return;
-}
+virtio_bus_reset(&proxy->bus);
 
-for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
-proxy->vqs[i].enabled = 0;
+if (!proxy->legacy) {
+for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+proxy->vqs[i].enabled = 0;
+}
 }
 }
 
@@ -376,7 +376,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, 
uint64_t value,
 return;
 }
 if (value == 0) {
-virtio_bus_reset(&vdev->bus);
+virtio_mmio_soft_reset(proxy);
 } else {
 virtio_queue_set_addr(vdev, vdev->queue_sel,
   value << proxy->guest_page_shift);
@@ -432,7 +432,6 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, 
uint64_t value,
 }
 
 if (vdev->status == 0) {
-virtio_reset(vdev);
 virtio_mmio_soft_reset(proxy);
 }
 break;
@@ -627,7 +626,8 @@ static void virtio_mmio_reset(DeviceState *d)
 VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
 int i;
 
-virtio_bus_reset(&proxy->bus);
+virtio_mmio_soft_reset(proxy);
+
 proxy->host_features_sel = 0;
 proxy->guest_features_sel = 0;
 proxy->guest_page_shift = 0;
@@ -636,7 +636,6 @@ static void virtio_mmio_reset(DeviceState *d)
 proxy->guest_features[0] = proxy->guest_features[1] = 0;
 
 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
-proxy->vqs[i].enabled = 0;
 proxy->vqs[i].num = 0;
 proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
 proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
-- 
2.36.1

[PULL 10/21] block: add more commands to preconfig mode

2022-06-16 Thread Paolo Bonzini

Of the block device commands, those that are available outside system
emulators do not require a fully constructed machine by definition.
Allow running them before machine initialization has concluded.

Of the ones that are available inside system emulation, allow querying
the PR managers, and setting up accounting and throttling.

Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Paolo Bonzini 
---
 hmp-commands.hx|  14 +
 qapi/block-core.json   | 117 +++--
 qapi/block-export.json |  21 +---
 qapi/block.json|   6 ++-
 4 files changed, 110 insertions(+), 48 deletions(-)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 564f1de364..c9d465735a 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -78,6 +78,7 @@ ERST
 .help   = "resize a block image",
 .cmd= hmp_block_resize,
 .coroutine  = true,
+.flags  = "p",
 },
 
 SRST
@@ -94,6 +95,7 @@ ERST
 .params = "device [speed [base]]",
 .help   = "copy data from a backing file into a block device",
 .cmd= hmp_block_stream,
+.flags  = "p",
 },
 
 SRST
@@ -107,6 +109,7 @@ ERST
 .params = "device speed",
 .help   = "set maximum speed for a background block operation",
 .cmd= hmp_block_job_set_speed,
+.flags  = "p",
 },
 
 SRST
@@ -122,6 +125,7 @@ ERST
   "\n\t\t\t if you want to abort the operation immediately"
   "\n\t\t\t instead of keep running until data is in 
sync)",
 .cmd= hmp_block_job_cancel,
+.flags  = "p",
 },
 
 SRST
@@ -135,6 +139,7 @@ ERST
 .params = "device",
 .help   = "stop an active background block operation",
 .cmd= hmp_block_job_complete,
+.flags  = "p",
 },
 
 SRST
@@ -149,6 +154,7 @@ ERST
 .params = "device",
 .help   = "pause an active background block operation",
 .cmd= hmp_block_job_pause,
+.flags  = "p",
 },
 
 SRST
@@ -162,6 +168,7 @@ ERST
 .params = "device",
 .help   = "resume a paused background block operation",
 .cmd= hmp_block_job_resume,
+.flags  = "p",
 },
 
 SRST
@@ -1406,6 +1413,7 @@ ERST
 .params = "nbd_server_start [-a] [-w] host:port",
 .help   = "serve block devices on the given host and port",
 .cmd= hmp_nbd_server_start,
+.flags  = "p",
 },
 SRST
 ``nbd_server_start`` *host*:*port*
@@ -1421,6 +1429,7 @@ ERST
 .params = "nbd_server_add [-w] device [name]",
 .help   = "export a block device via NBD",
 .cmd= hmp_nbd_server_add,
+.flags  = "p",
 },
 SRST
 ``nbd_server_add`` *device* [ *name* ]
@@ -1436,6 +1445,7 @@ ERST
 .params = "nbd_server_remove [-f] name",
 .help   = "remove an export previously exposed via NBD",
 .cmd= hmp_nbd_server_remove,
+.flags  = "p",
 },
 SRST
 ``nbd_server_remove [-f]`` *name*
@@ -1452,6 +1462,7 @@ ERST
 .params = "nbd_server_stop",
 .help   = "stop serving block devices using the NBD protocol",
 .cmd= hmp_nbd_server_stop,
+.flags  = "p",
 },
 SRST
 ``nbd_server_stop``
@@ -1481,6 +1492,7 @@ ERST
 .params = "getfd name",
 .help   = "receive a file descriptor via SCM rights and assign it 
a name",
 .cmd= hmp_getfd,
+.flags  = "p",
 },
 
 SRST
@@ -1496,6 +1508,7 @@ ERST
 .params = "closefd name",
 .help   = "close a file descriptor previously passed via SCM 
rights",
 .cmd= hmp_closefd,
+.flags  = "p",
 },
 
 SRST
@@ -1511,6 +1524,7 @@ ERST
 .params = "device bps bps_rd bps_wr iops iops_rd iops_wr",
 .help   = "change I/O throttle limits for a block drive",
 .cmd= hmp_block_set_io_throttle,
+.flags  = "p",
 },
 
 SRST
diff --git a/qapi/block-core.json b/qapi/block-core.json
index f0383c7925..457df16638 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -737,7 +737,8 @@
 #}
 #
 ##
-{ 'command': 'query-block', 'returns': ['BlockInfo'] }
+{ 'command': 'query-block', 'returns': ['BlockInfo'],
+  'allow-preconfig': true }
 
 ##
 # @BlockDeviceTimedStats:
@@ -1113,7 +1114,8 @@
 ##
 { 'command': 'query-blockstats',
   'data': { '*query-nodes': 'bool' },
-  'returns': ['BlockStats'] }
+  'returns': ['BlockStats'],
+  'allow-preconfig': true }
 
 ##
 # @BlockdevOnError:
@@ -1262,7 +1264,8 @@
 #
 # Since: 1.1
 ##
-{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] }
+{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'],
+  'allow-preconfig': true }
 
 ##
 # @block_resize:
@@ -1293,7 +1296,8 @@
   'data': { '*device': 'str',

[PULL 08/21] qmp: add filtering of statistics by name

2022-06-16 Thread Paolo Bonzini

Allow retrieving only a subset of statistics.  This can be useful
for example in order to plot a subset of the statistics many times
a second: KVM publishes ~40 statistics for each vCPU on x86; retrieving
and serializing all of them would be useless.

Another use will be in HMP in the following patch; implementing the
filter in the backend is easy enough that it was deemed okay to make
this a public interface.

Example:

{ "execute": "query-stats",
  "arguments": {
"target": "vcpu",
"vcpus": [ "/machine/unattached/device[2]",
   "/machine/unattached/device[4]" ],
"providers": [
  { "provider": "kvm",
"names": [ "l1d_flush", "exits" ] } } }

{ "return": {
"vcpus": [
  { "path": "/machine/unattached/device[2]"
"providers": [
  { "provider": "kvm",
"stats": [ { "name": "l1d_flush", "value": 41213 },
   { "name": "exits", "value": 74291 } ] } ] },
  { "path": "/machine/unattached/device[4]"
"providers": [
  { "provider": "kvm",
"stats": [ { "name": "l1d_flush", "value": 16132 },
   { "name": "exits", "value": 57922 } ] } ] } ] } }

Extracted from a patch by Mark Kanda.

Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Paolo Bonzini 
---
 accel/kvm/kvm-all.c | 17 +++--
 include/monitor/stats.h |  2 +-
 monitor/qmp-cmds.c  |  7 ++-
 qapi/stats.json |  6 +-
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 2e819beaeb..ba3210b1c1 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2312,7 +2312,7 @@ bool kvm_dirty_ring_enabled(void)
 }
 
 static void query_stats_cb(StatsResultList **result, StatsTarget target,
-   strList *targets, Error **errp);
+   strList *names, strList *targets, Error **errp);
 static void query_stats_schemas_cb(StatsSchemaList **result, Error **errp);
 
 static int kvm_init(MachineState *ms)
@@ -3713,6 +3713,7 @@ typedef struct StatsArgs {
 StatsResultList **stats;
 StatsSchemaList **schema;
 } result;
+strList *names;
 Error **errp;
 } StatsArgs;
 
@@ -3916,7 +3917,7 @@ static StatsDescriptors 
*find_stats_descriptors(StatsTarget target, int stats_fd
 }
 
 static void query_stats(StatsResultList **result, StatsTarget target,
-int stats_fd, Error **errp)
+strList *names, int stats_fd, Error **errp)
 {
 struct kvm_stats_desc *kvm_stats_desc;
 struct kvm_stats_header *kvm_stats_header;
@@ -3958,6 +3959,9 @@ static void query_stats(StatsResultList **result, 
StatsTarget target,
 
 /* Add entry to the list */
 stats = (void *)stats_data + pdesc->offset;
+if (!apply_str_list_filter(pdesc->name, names)) {
+continue;
+}
 stats_list = add_kvmstat_entry(pdesc, stats, stats_list, errp);
 }
 
@@ -4019,8 +4023,8 @@ static void query_stats_vcpu(CPUState *cpu, 
run_on_cpu_data data)
 error_propagate(kvm_stats_args->errp, local_err);
 return;
 }
-query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU, stats_fd,
-kvm_stats_args->errp);
+query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU,
+kvm_stats_args->names, stats_fd, kvm_stats_args->errp);
 close(stats_fd);
 }
 
@@ -4041,7 +4045,7 @@ static void query_stats_schema_vcpu(CPUState *cpu, 
run_on_cpu_data data)
 }
 
 static void query_stats_cb(StatsResultList **result, StatsTarget target,
-   strList *targets, Error **errp)
+   strList *names, strList *targets, Error **errp)
 {
 KVMState *s = kvm_state;
 CPUState *cpu;
@@ -4055,7 +4059,7 @@ static void query_stats_cb(StatsResultList **result, 
StatsTarget target,
 error_setg_errno(errp, errno, "KVM stats: ioctl failed");
 return;
 }
-query_stats(result, target, stats_fd, errp);
+query_stats(result, target, names, stats_fd, errp);
 close(stats_fd);
 break;
 }
@@ -4063,6 +4067,7 @@ static void query_stats_cb(StatsResultList **result, 
StatsTarget target,
 {
 StatsArgs stats_args;
 stats_args.result.stats = result;
+stats_args.names = names;
 stats_args.errp = errp;
 CPU_FOREACH(cpu) {
 if (!apply_str_list_filter(cpu->parent_obj.canonical_path, 
targets)) {
diff --git a/include/monitor/stats.h b/include/monitor/stats.h
index 80a523dd29..fcf0983154 100644
--- a/include/monitor/stats.h
+++ b/include/monitor/stats.h
@@ -11,7 +11,7 @@
 #include "qapi/qapi-types-stats.h"
 
 typedef void StatRetrieveFunc(StatsResultList **result, StatsTarget target,
-  strList *targets, Error **errp);
+  strList *names, strList *targets, Error **errp);
 typedef void SchemaRetrieveFunc(Stats

[PULL 11/21] s390x: simplify virtio_ccw_reset_virtio

2022-06-16 Thread Paolo Bonzini

Call virtio_bus_reset instead of virtio_reset, so that the function
need not receive the VirtIODevice.

Reviewed-by: Cornelia Huck 
Signed-off-by: Paolo Bonzini 
---
 hw/s390x/virtio-ccw.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 15b458527e..066a387802 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -249,12 +249,12 @@ static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock 
*info,
 return 0;
 }
 
-static void virtio_ccw_reset_virtio(VirtioCcwDevice *dev, VirtIODevice *vdev)
+static void virtio_ccw_reset_virtio(VirtioCcwDevice *dev)
 {
 CcwDevice *ccw_dev = CCW_DEVICE(dev);
 
 virtio_ccw_stop_ioeventfd(dev);
-virtio_reset(vdev);
+virtio_bus_reset(&dev->bus);
 if (dev->indicators) {
 release_indicator(&dev->routes.adapter, dev->indicators);
 dev->indicators = NULL;
@@ -359,7 +359,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
 ret = virtio_ccw_handle_set_vq(sch, ccw, check_len, dev->revision < 1);
 break;
 case CCW_CMD_VDEV_RESET:
-virtio_ccw_reset_virtio(dev, vdev);
+virtio_ccw_reset_virtio(dev);
 ret = 0;
 break;
 case CCW_CMD_READ_FEAT:
@@ -536,7 +536,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
 }
 if (virtio_set_status(vdev, status) == 0) {
 if (vdev->status == 0) {
-virtio_ccw_reset_virtio(dev, vdev);
+virtio_ccw_reset_virtio(dev);
 }
 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
 virtio_ccw_start_ioeventfd(dev);
@@ -921,10 +921,9 @@ static void virtio_ccw_notify(DeviceState *d, uint16_t 
vector)
 static void virtio_ccw_reset(DeviceState *d)
 {
 VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
-VirtIODevice *vdev = virtio_bus_get_device(&dev->bus);
 VirtIOCCWDeviceClass *vdc = VIRTIO_CCW_DEVICE_GET_CLASS(dev);
 
-virtio_ccw_reset_virtio(dev, vdev);
+virtio_ccw_reset_virtio(dev);
 if (vdc->parent_reset) {
 vdc->parent_reset(d);
 }
-- 
2.36.1

[PULL 17/21] tests/vm: allow running tests in an unconfigured source tree

2022-06-16 Thread Paolo Bonzini

tests/vm/Makefile.include used to assume that it could run in an unconfigured
source tree, and Cirrus CI relies on that.  It was however broken by commit
f4c66f1705 ("tests: use tests/venv to run basevm.py-based scripts", 2022-06-06),
which co-opted the virtual environment being used by avocado tests
to also run the basevm.py tests.

For now, reintroduce the usage of qemu.qmp from the source directory, but
without the sys.path() hacks.  The CI configuration can be changed to
install the package via pip when qemu.qmp is removed from the source tree.

Cc: John Snow 
Signed-off-by: Paolo Bonzini 
---
 tests/vm/Makefile.include | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index 588bc999cc..5f5b1fbfe6 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -1,8 +1,17 @@
 # Makefile for VM tests
 
-.PHONY: vm-build-all vm-clean-all
+# Hack to allow running in an unconfigured build tree
+ifeq ($(wildcard $(SRC_PATH)/config-host.mak),)
+VM_PYTHON = PYTHONPATH=$(SRC_PATH)/python /usr/bin/env python3
+VM_VENV =
+HOST_ARCH := $(shell uname -m)
+else
+VM_PYTHON = $(TESTS_PYTHON)
+VM_VENV = check-venv
+HOST_ARCH = $(ARCH)
+endif
 
-HOST_ARCH = $(if $(ARCH),$(ARCH),$(shell uname -m))
+.PHONY: vm-build-all vm-clean-all
 
 EFI_AARCH64 = $(wildcard $(BUILD_DIR)/pc-bios/edk2-aarch64-code.fd)
 
@@ -85,10 +94,10 @@ vm-clean-all:
 $(IMAGES_DIR)/%.img:   $(SRC_PATH)/tests/vm/% \
$(SRC_PATH)/tests/vm/basevm.py \
$(SRC_PATH)/tests/vm/Makefile.include \
-   check-venv
+   $(VM_VENV)
@mkdir -p $(IMAGES_DIR)
$(call quiet-command, \
-   $(TESTS_PYTHON) $< \
+   $(VM_PYTHON) $< \
$(if $(V)$(DEBUG), --debug) \
$(if $(GENISOIMAGE),--genisoimage $(GENISOIMAGE)) \
$(if $(QEMU_LOCAL),--build-path $(BUILD_DIR)) \
@@ -100,11 +109,10 @@ $(IMAGES_DIR)/%.img:  $(SRC_PATH)/tests/vm/% \
--build-image $@, \
"  VM-IMAGE $*")
 
-
 # Build in VM $(IMAGE)
-vm-build-%: $(IMAGES_DIR)/%.img check-venv
+vm-build-%: $(IMAGES_DIR)/%.img $(VM_VENV)
$(call quiet-command, \
-   $(TESTS_PYTHON) $(SRC_PATH)/tests/vm/$* \
+   $(VM_PYTHON) $(SRC_PATH)/tests/vm/$* \
$(if $(V)$(DEBUG), --debug) \
$(if $(DEBUG), --interactive) \
$(if $(J),--jobs $(J)) \
@@ -128,9 +136,9 @@ vm-boot-serial-%: $(IMAGES_DIR)/%.img
-device virtio-net-pci,netdev=vnet \
|| true
 
-vm-boot-ssh-%: $(IMAGES_DIR)/%.img check-venv
+vm-boot-ssh-%: $(IMAGES_DIR)/%.img $(VM_VENV)
$(call quiet-command, \
-   $(TESTS_PYTHON) $(SRC_PATH)/tests/vm/$* \
+   $(VM_PYTHON) $(SRC_PATH)/tests/vm/$* \
$(if $(J),--jobs $(J)) \
$(if $(V)$(DEBUG), --debug) \
$(if $(QEMU_LOCAL),--build-path $(BUILD_DIR)) \
-- 
2.36.1

[PULL 12/21] virtio-mmio: stop ioeventfd on legacy reset

2022-06-16 Thread Paolo Bonzini

If the queue PFN is set to zero on a virtio-mmio device, the device is reset.
In that case however the virtio_bus_stop_ioeventfd function was not
called; add it so that the behavior is similar to when status is set to 0.

Reviewed-by: Cornelia Huck 
Signed-off-by: Paolo Bonzini 
---
 hw/virtio/virtio-mmio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 688eccda94..41a35d31c8 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -376,6 +376,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, 
uint64_t value,
 return;
 }
 if (value == 0) {
+virtio_mmio_stop_ioeventfd(proxy);
 virtio_reset(vdev);
 } else {
 virtio_queue_set_addr(vdev, vdev->queue_sel,
-- 
2.36.1

Corrupted display changing screen colour depth in qemu-system-ppc/MacOS

2022-06-16 Thread Mark Cave-Ayland


Hi all,

Howard pointed me off-list to a corrupted screen display issue experienced when 
changing the screen colour depth in MacOS under qemu-system-ppc. I was able to 
reproduce it here, and noticed from the output that the issue was likely due to the 
host display not updating its depth accordingly.


After it was confirmed to me that this was working in QEMU 6.2, I was able to 
eventually bisect the problem down to this commit:



cb8962c146b2633a4b04562281de9b2703bba849 is the first bad commit
commit cb8962c146b2633a4b04562281de9b2703bba849
Author: Marc-André Lureau 
Date:   Tue Feb 15 00:13:37 2022 +0400

ui: do not create a surface when resizing a GL scanout

qemu_console_resize() will create a blank surface and replace the
current scanout with it if called while the current scanout is
GL (texture or dmabuf).

This is not only very costly, but also can produce glitches on the
display/listener side.

Instead, compare the current console size with the fitting console
functions, which also works when the scanout is GL.

Note: there might be still an unnecessary surface creation on calling
qemu_console_resize() when the size is actually changing, but display
backends currently rely on DisplaySurface details during
dpy_gfx_switch() to handle various resize aspects. We would need more
refactoring to handle resize without DisplaySurface, this is left for a
future improvement.

Signed-off-by: Marc-André Lureau 
Message-Id: <20220214201337.1814787-4-marcandre.lur...@redhat.com>
Signed-off-by: Gerd Hoffmann 

 ui/console.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)


Some more background: the screen in qemu-system-ppc's MacOS is controlled via a 
custom driver written by Ben which uses the Bochs VBE registers to change the screen 
width/height/depth. The code used to do this can be found at 
https://gitlab.com/qemu-project/QemuMacDrivers/-/blob/master/QemuVGADriver/src/QemuVga.c#L354.


Looking at the changes in cb8962c146 my guess would be that either the updated check 
in qemu_console_resize() should also check to see if the surface depth is unchanged 
before exiting early, or that there is an extra update required in the VGA device 
when changing just the screen colour depth by itself.



ATB,

Mark.

[PULL 16/21] configure: cleanup -fno-pie detection

2022-06-16 Thread Paolo Bonzini

Place it only inside the 'if test "$pie" = "no"' conditional.  Since
commit 43924d1e53 ("pc-bios/optionrom: detect -fno-pie", 2022-05-12),
the PIE options are detected independently by pc-bios/optionrom/Makefile,
and the CFLAGS_NOPIE/LDFLAGS_NOPIE variables are not used anymore.

Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 configure | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/configure b/configure
index d41c7eddff..9fba134746 100755
--- a/configure
+++ b/configure
@@ -1346,13 +1346,6 @@ static THREAD int tls_var;
 int main(void) { return tls_var; }
 EOF
 
-# Check we support -fno-pie and -no-pie first; we will need the former for
-# building ROMs, and both for everything if --disable-pie is passed.
-if compile_prog "-Werror -fno-pie" "-no-pie"; then
-  CFLAGS_NOPIE="-fno-pie"
-  LDFLAGS_NOPIE="-no-pie"
-fi
-
 if test "$static" = "yes"; then
   if test "$pie" != "no" && compile_prog "-Werror -fPIE -DPIE" "-static-pie"; 
then
 CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS"
@@ -1365,8 +1358,10 @@ if test "$static" = "yes"; then
 pie="no"
   fi
 elif test "$pie" = "no"; then
-  CONFIGURE_CFLAGS="$CFLAGS_NOPIE $CONFIGURE_CFLAGS"
-  CONFIGURE_LDFLAGS="$LDFLAGS_NOPIE $CONFIGURE_LDFLAGS"
+  if compile_prog "-Werror -fno-pie" "-no-pie"; then
+CONFIGURE_CFLAGS="-fno-pie $CONFIGURE_CFLAGS"
+CONFIGURE_LDFLAGS="-no-pie $CONFIGURE_LDFLAGS"
+  fi
 elif compile_prog "-Werror -fPIE -DPIE" "-pie"; then
   CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS"
   CONFIGURE_LDFLAGS="-pie $CONFIGURE_LDFLAGS"
-- 
2.36.1

[PULL 13/21] virtio: stop ioeventfd on reset

2022-06-16 Thread Paolo Bonzini

All calls to virtio_bus_reset are preceded by virtio_bus_stop_ioeventfd,
move the call in virtio_bus_reset: that makes sense and clarifies
that the vdc->reset function is called with ioeventfd already stopped.

Reviewed-by: Cornelia Huck 
Signed-off-by: Paolo Bonzini 
---
 hw/s390x/virtio-ccw.c   | 1 -
 hw/virtio/virtio-bus.c  | 1 +
 hw/virtio/virtio-mmio.c | 4 +---
 hw/virtio/virtio-pci.c  | 1 -
 4 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 066a387802..e33e5207ab 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -253,7 +253,6 @@ static void virtio_ccw_reset_virtio(VirtioCcwDevice *dev)
 {
 CcwDevice *ccw_dev = CCW_DEVICE(dev);
 
-virtio_ccw_stop_ioeventfd(dev);
 virtio_bus_reset(&dev->bus);
 if (dev->indicators) {
 release_indicator(&dev->routes.adapter, dev->indicators);
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index d7ec023adf..896feb37a1 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -104,6 +104,7 @@ void virtio_bus_reset(VirtioBusState *bus)
 VirtIODevice *vdev = virtio_bus_get_device(bus);
 
 DPRINTF("%s: reset device.\n", BUS(bus)->name);
+virtio_bus_stop_ioeventfd(bus);
 if (vdev != NULL) {
 virtio_reset(vdev);
 }
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 41a35d31c8..6d81a26473 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -376,8 +376,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, 
uint64_t value,
 return;
 }
 if (value == 0) {
-virtio_mmio_stop_ioeventfd(proxy);
-virtio_reset(vdev);
+virtio_bus_reset(&vdev->bus);
 } else {
 virtio_queue_set_addr(vdev, vdev->queue_sel,
   value << proxy->guest_page_shift);
@@ -628,7 +627,6 @@ static void virtio_mmio_reset(DeviceState *d)
 VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
 int i;
 
-virtio_mmio_stop_ioeventfd(proxy);
 virtio_bus_reset(&proxy->bus);
 proxy->host_features_sel = 0;
 proxy->guest_features_sel = 0;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 0566ad7d00..45327f0b31 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1945,7 +1945,6 @@ static void virtio_pci_reset(DeviceState *qdev)
 PCIDevice *dev = PCI_DEVICE(qdev);
 int i;
 
-virtio_pci_stop_ioeventfd(proxy);
 virtio_bus_reset(bus);
 msix_unuse_all_vectors(&proxy->pci_dev);
 
-- 
2.36.1

[PULL 18/21] build: fix check for -fsanitize-coverage-allowlist

2022-06-16 Thread Paolo Bonzini

From: Alexander Bulekov 

The existing check has two problems:
1. Meson uses a private directory for the get_supported_arguments check.
./instrumentation-filter does not exist in that private directory (it is
copied into the root of the build-directory).

2. fsanitize-coverage-allowlist is unused when coverage instrumentation
is not configured. No instrumentation are passed for the
get_supported_arguments check

Thus the check always fails. To work around this, change the check to an
"if cc.compiles" check and provide /dev/null, instead of the real
filter.

Meson log:
Working directory:  build/meson-private/tmpl6wld2d9
Command line:  clang-13 -m64 -mcx16
build/meson-private/tmpl6wld2d9/output.obj -c -O3 -D_FILE_OFFSET_BITS=64
-O0 -Werror=implicit-function-declaration -Werror=unknown-warning-option
-Werror=unused-command-line-argument
-Werror=ignored-optimization-argument
-fsanitize-coverage-allowlist=instrumentation-filter

Error:
error: argument unused during compilation:
'-fsanitize-coverage-allowlist=instrumentation-filter'

Signed-off-by: Alexander Bulekov 
Message-Id: <20220614155415.4023833-1-alx...@bu.edu>
Signed-off-by: Paolo Bonzini 
---
 meson.build | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/meson.build b/meson.build
index 21cd949082..fe5d6632fb 100644
--- a/meson.build
+++ b/meson.build
@@ -209,9 +209,13 @@ if get_option('fuzzing')
   configure_file(output: 'instrumentation-filter',
  input: 'scripts/oss-fuzz/instrumentation-filter-template',
  copy: true)
-  add_global_arguments(
-  
cc.get_supported_arguments('-fsanitize-coverage-allowlist=instrumentation-filter'),
-  native: false, language: ['c', 'cpp', 'objc'])
+
+  if cc.compiles('int main () { return 0; }',
+  name: '-fsanitize-coverage-allowlist=/dev/null',
+ args: ['-fsanitize-coverage-allowlist=/dev/null'] )
+
add_global_arguments('-fsanitize-coverage-allowlist=instrumentation-filter',
+ native: false, language: ['c', 'cpp', 'objc'])
+  endif
 
   if get_option('fuzzing_engine') == ''
 # Add CFLAGS to tell clang to add fuzzer-related instrumentation to all the
-- 
2.36.1

[PULL 20/21] meson: put cross compiler info in a separate section

2022-06-16 Thread Paolo Bonzini

While at it, remove a dead assignment and simply inline the value of the
"target" variable, which is used just once.

Signed-off-by: Paolo Bonzini 
---
 meson.build | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/meson.build b/meson.build
index fe5d6632fb..0458b69cdf 100644
--- a/meson.build
+++ b/meson.build
@@ -3744,21 +3744,24 @@ endif
 summary_info += {'strip binaries':get_option('strip')}
 summary_info += {'sparse':sparse}
 summary_info += {'mingw32 support':   targetos == 'windows'}
+summary(summary_info, bool_yn: true, section: 'Compilation')
 
 # snarf the cross-compilation information for tests
+summary_info = {}
+have_cross = false
 foreach target: target_dirs
   tcg_mak = meson.current_build_dir() / 'tests/tcg' / 'config-' + target + 
'.mak'
   if fs.exists(tcg_mak)
 config_cross_tcg = keyval.load(tcg_mak)
-target = config_cross_tcg['TARGET_NAME']
-compiler = ''
 if 'CC' in config_cross_tcg
-  summary_info += {target + ' tests': config_cross_tcg['CC']}
+  summary_info += {config_cross_tcg['TARGET_NAME']: config_cross_tcg['CC']}
+  have_cross = true
 endif
-   endif
+  endif
 endforeach
-
-summary(summary_info, bool_yn: true, section: 'Compilation')
+if have_cross
+  summary(summary_info, bool_yn: true, section: 'Cross compilers')
+endif
 
 # Targets and accelerators
 summary_info = {}
-- 
2.36.1

[PULL 21/21] build: include pc-bios/ part in the ROMS variable

2022-06-16 Thread Paolo Bonzini

Include the full path in TARGET_DIR, so that messages from sub-Makefiles
are clearer.  Also, prepare for possibly building firmware outside
pc-bios/ from the Makefile,

Signed-off-by: Paolo Bonzini 
---
 Makefile  | 12 +---
 configure |  6 +++---
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index 3c0d89057e..ec4445db9a 100644
--- a/Makefile
+++ b/Makefile
@@ -186,16 +186,14 @@ include $(SRC_PATH)/tests/Makefile.include
 
 all: recurse-all
 
-ROM_DIRS = $(addprefix pc-bios/, $(ROMS))
-ROM_DIRS_RULES=$(foreach t, all clean, $(addsuffix /$(t), $(ROM_DIRS)))
-# Only keep -O and -g cflags
-.PHONY: $(ROM_DIRS_RULES)
-$(ROM_DIRS_RULES):
+ROMS_RULES=$(foreach t, all clean, $(addsuffix /$(t), $(ROMS)))
+.PHONY: $(ROMS_RULES)
+$(ROMS_RULES):
$(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C $(dir $@) V="$(V)" 
TARGET_DIR="$(dir $@)" $(notdir $@),)
 
 .PHONY: recurse-all recurse-clean
-recurse-all: $(addsuffix /all, $(ROM_DIRS))
-recurse-clean: $(addsuffix /clean, $(ROM_DIRS))
+recurse-all: $(addsuffix /all, $(ROMS))
+recurse-clean: $(addsuffix /clean, $(ROMS))
 
 ##
 
diff --git a/configure b/configure
index 9fba134746..a4d61fe504 100755
--- a/configure
+++ b/configure
@@ -2236,7 +2236,7 @@ if test -n "$target_cc" &&
 fi
 done
 if test -n "$ld_i386_emulation"; then
-roms="optionrom"
+roms="pc-bios/optionrom"
 config_mak=pc-bios/optionrom/config.mak
 echo "# Automatically generated by configure - do not modify" > 
$config_mak
 echo "TOPSRC_DIR=$source_path" >> $config_mak
@@ -2247,7 +2247,7 @@ fi
 
 probe_target_compilers ppc ppc64
 if test -n "$target_cc" && test "$softmmu" = yes; then
-roms="$roms vof"
+roms="$roms pc-bios/vof"
 config_mak=pc-bios/vof/config.mak
 echo "# Automatically generated by configure - do not modify" > $config_mak
 echo "SRC_DIR=$source_path/pc-bios/vof" >> $config_mak
@@ -2266,7 +2266,7 @@ if test -n "$target_cc" && test "$softmmu" = yes; then
   echo "WARNING: Your compiler does not support the z900!"
   echo " The s390-ccw bios will only work with guest CPUs >= z10."
 fi
-roms="$roms s390-ccw"
+roms="$roms pc-bios/s390-ccw"
 config_mak=pc-bios/s390-ccw/config-host.mak
 echo "# Automatically generated by configure - do not modify" > $config_mak
 echo "SRC_PATH=$source_path/pc-bios/s390-ccw" >> $config_mak
-- 
2.36.1

[PULL 19/21] q35：Enable TSEG only when G_SMRAME and TSEG_EN both enabled

2022-06-16 Thread Paolo Bonzini

From: Zhenzhong Duan 

According to spec:
"TSEG Enable (T_EN): Enabling of SMRAM memory for Extended SMRAM space
only. When G_SMRAME = 1 and TSEG_EN = 1, the TSEG is enabled to appear
in the appropriate physical address space. Note that once D_LCK is set,
this bit becomes read only."

Changed to match the spec description.

Signed-off-by: Zhenzhong Duan 
Message-Id: <20220615034501.2733802-1-zhenzhong.d...@intel.com>
Signed-off-by: Paolo Bonzini 
---
 hw/pci-host/q35.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index ab5a47aff5..20da121374 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -379,7 +379,8 @@ static void mch_update_smram(MCHPCIState *mch)
 memory_region_set_enabled(&mch->high_smram, false);
 }
 
-if (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_T_EN) {
+if ((pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_T_EN) &&
+(pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_G_SMRAME)) {
 switch (pd->config[MCH_HOST_BRIDGE_ESMRAMC] &
 MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK) {
 case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB:
-- 
2.36.1

Re: [PATCH v6 0/7] cutils: Introduce bundle mechanism

2022-06-16 Thread Paolo Bonzini


On 6/15/22 21:27, Paolo Bonzini wrote:


Thanks for prototyping this, I think this is appealing.  I'll take a 
look later at using meson introspection info to build the preinstall 
layout.


Something like this:

diff --git a/meson.build b/meson.build
index 0458b69cdf..17023d511a 100644
--- a/meson.build
+++ b/meson.build
@@ -3612,6 +3612,21 @@ if host_machine.system() == 'windows'
   alias_target('installer', nsis)
 endif

+#
+# Preinstalled data #
+#
+
+qemu_bundle_sources_stamp = configure_file(
+  output: 'qemu-bundle-sources.stamp',
+  command: ['touch', '@OUTPUT'])
+custom_target('tree with preinstalled data files',
+  build_by_default: true,
+  output: 'qemu-bundle.stamp',
+  input: qemu_bundle_sources_stamp,
+  env: {'MESON': config_host['MESON']},
+  command: files('symlink-install-tree.py'),
+  capture: true)
+
 #
 # Configuration summary #
 #
diff --git a/scripts/symlink-install-tree.py 
b/scripts/symlink-install-tree.py

new file mode 100644
index 00..cd66dc06cd
--- /dev/null
+++ b/scripts/symlink-install-tree.py
@@ -0,0 +1,34 @@
+#! /usr/bin/env python3
+
+import json
+import os
+import subprocess
+
+def destdir_join(d1: str, d2: str) -> str:
+if not d1:
+return d2
+if not os.path.isabs(d2):
+return os.path.join(d1, d2)
+
+# c:\destdir + c:\prefix must produce c:\destdir\prefix
+if len(d2) > 1 and d2[1] == ':':
+return d1 + d2[2:]
+return d1 + d2
+
+meson = os.environ.get("MESON")
+out = subprocess.run([meson, 'introspect', '--installed'],
+ stdout=subprocess.PIPE, check=True).stdout
+for source, dest in json.loads(out).items():
+assert os.path.isabs(source)
+bundle_dest = destdir_join('qemu-bundle', dest)
+path = os.path.dirname(bundle_dest)
+try:
+os.makedirs(path, exist_ok=True)
+except e:
+print('error making directory {path}', file=sys.stderr)
+raise e
+try:
+os.symlink(source, bundle_dest)
+except e:
+print('error making symbolic link {dest}', file=sys.stderr)
+raise e

Re: [PATCH] tests/vm: do not specify -bios option

2022-06-16 Thread Daniel P . Berrangé

On Thu, Jun 16, 2022 at 10:30:25AM +0200, Paolo Bonzini wrote:
> When running from the build tree, the executable is able to find
> the BIOS on its own; when running from the source tree, a firmware
> blob should already be installed and there is no guarantee that
> the one in the source tree works with the QEMU that is being used for
> the installation.

I think there is interaction with

  
--firmwarepath=/usr/share/qemu-firmware:/usr/share/ipxe/qemu:/usr/share/seavgabios:/usr/share/seabios:/usr/share/sgabios

as the firmware path listed there will be searched before the local
build tree.

I agree with your desire to remove the -bios option, but I'm not
sure it is entirely safe unless logic in qemu_find_file is fixed
to ignore the global search path when running from the build
tree.

> Just remove the -bios option, since it is unnecessary and in fact
> there are other x86 VM tests that do not bother specifying it.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  tests/vm/fedora  | 1 -
>  tests/vm/freebsd | 1 -
>  tests/vm/netbsd  | 1 -
>  tests/vm/openbsd | 1 -
>  4 files changed, 4 deletions(-)
> 
> diff --git a/tests/vm/fedora b/tests/vm/fedora
> index 92b78d6e2c..12eca919a0 100755
> --- a/tests/vm/fedora
> +++ b/tests/vm/fedora
> @@ -79,7 +79,6 @@ class FedoraVM(basevm.BaseVM):
>  self.exec_qemu_img("create", "-f", "qcow2", img_tmp, self.size)
>  self.print_step("Booting installer")
>  self.boot(img_tmp, extra_args = [
> -"-bios", "pc-bios/bios-256k.bin",
>  "-machine", "graphics=off",
>  "-device", "VGA",
>  "-cdrom", iso
> diff --git a/tests/vm/freebsd b/tests/vm/freebsd
> index 805db759d6..cd1fabde52 100755
> --- a/tests/vm/freebsd
> +++ b/tests/vm/freebsd
> @@ -95,7 +95,6 @@ class FreeBSDVM(basevm.BaseVM):
>  
>  self.print_step("Booting installer")
>  self.boot(img_tmp, extra_args = [
> -"-bios", "pc-bios/bios-256k.bin",
>  "-machine", "graphics=off",
>  "-device", "VGA",
>  "-cdrom", iso
> diff --git a/tests/vm/netbsd b/tests/vm/netbsd
> index 45aa9a7fda..aa883ec23c 100755
> --- a/tests/vm/netbsd
> +++ b/tests/vm/netbsd
> @@ -86,7 +86,6 @@ class NetBSDVM(basevm.BaseVM):
>  
>  self.print_step("Booting installer")
>  self.boot(img_tmp, extra_args = [
> -"-bios", "pc-bios/bios-256k.bin",
>  "-machine", "graphics=off",
>  "-cdrom", iso
>  ])
> diff --git a/tests/vm/openbsd b/tests/vm/openbsd
> index 13c8254214..6f1b6f5b98 100755
> --- a/tests/vm/openbsd
> +++ b/tests/vm/openbsd
> @@ -82,7 +82,6 @@ class OpenBSDVM(basevm.BaseVM):
>  
>  self.print_step("Booting installer")
>  self.boot(img_tmp, extra_args = [
> -"-bios", "pc-bios/bios-256k.bin",
>  "-machine", "graphics=off",
>  "-device", "VGA",
>  "-cdrom", iso
> -- 
> 2.36.1
> 
> 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[PATCH] block/rbd: support driver-specific reopen

2022-06-16 Thread Raphael Pour


Hello everyone,

what do you think? Tell me if something needs to be clarified or improved.

Raphael


OpenPGP_0xCDB1EBB785C5EB7E.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature

Re: [PATCH] hw/riscv: virt: pass random seed to fdt

2022-06-16 Thread Jason A. Donenfeld

Hi Alistair,

On Thu, Jun 16, 2022 at 12:32:36PM +1000, Alistair Francis wrote:
> Applied to riscv-to-apply.next with the full stop removed

Great, thanks. Just wondering: am I looking in the right repo? I don't
see it here: https://github.com/alistair23/qemu/commits/riscv-to-apply.next

Jason

[PATCH v2 2/2] tests/qtest/i440fx-test.c: Enable full test of i440FX PAM operation

2022-06-16 Thread Lev Kujawski

With the prior patch in this series adding support for RE^WE PAM
semantics, the '#ifndef BROKEN' segments of test_i440fx_pam can now be
enabled.

Additionally:
- Verify that changing attributes does not affect the initial contents
  of the PAM region;
- Verify that that the first new mask is written before switching
  attributes;
- Switch back to PAM_RE after PAM_WE to read original contents;
- Tighten logic of the !WE write test because we know what the
  original contents were; and
- Write the last mask before testing for it.

Signed-off-by: Lev Kujawski 
---
(v2) No change.

 tests/qtest/i440fx-test.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/tests/qtest/i440fx-test.c b/tests/qtest/i440fx-test.c
index 6d7d4d8d8f..073a16bbed 100644
--- a/tests/qtest/i440fx-test.c
+++ b/tests/qtest/i440fx-test.c
@@ -236,33 +236,34 @@ static void test_i440fx_pam(gconstpointer opaque)
 
 /* Switch to WE for the area */
 pam_set(dev, i, PAM_RE | PAM_WE);
+/* Verify the RAM is still all zeros */
+g_assert(verify_area(pam_area[i].start, pam_area[i].end, 0));
 /* Write out a non-zero mask to the full area */
 write_area(pam_area[i].start, pam_area[i].end, 0x42);
-
-#ifndef BROKEN
-/* QEMU only supports a limited form of PAM */
+/* Verify the area contains the new mask */
+g_assert(verify_area(pam_area[i].start, pam_area[i].end, 0x42));
 
 /* Switch to !RE for the area */
 pam_set(dev, i, PAM_WE);
 /* Verify the area is not our mask */
 g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x42));
-#endif
 
-/* Verify the area is our new mask */
+/* Switch to !WE for the area */
+pam_set(dev, i, PAM_RE);
+/* Verify the area is once again our mask */
 g_assert(verify_area(pam_area[i].start, pam_area[i].end, 0x42));
 
 /* Write out a new mask */
 write_area(pam_area[i].start, pam_area[i].end, 0x82);
 
-#ifndef BROKEN
-/* QEMU only supports a limited form of PAM */
-
-/* Verify the area is not our mask */
-g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x82));
+/* Verify the area is not the new mask */
+g_assert(verify_area(pam_area[i].start, pam_area[i].end, 0x42));
 
 /* Switch to RE for the area */
 pam_set(dev, i, PAM_RE | PAM_WE);
-#endif
+/* Write out a new mask again */
+write_area(pam_area[i].start, pam_area[i].end, 0x82);
+
 /* Verify the area is our new mask */
 g_assert(verify_area(pam_area[i].start, pam_area[i].end, 0x82));
 
-- 
2.34.1

[PATCH v2 1/2] hw/pci-host/pam.c: Fully support RE^WE semantics of i440FX PAM

2022-06-16 Thread Lev Kujawski

The Programmable Attribute Registers (PAM) of QEMU's emulated i440FX
chipset now fully support the exclusive Read Enable (RE) and Write
Enable (WE) modes by forwarding reads of the applicable PAM region to
RAM and writes to the bus or vice versa, respectively.

The prior behavior for the RE case was to setup a RAM alias and mark
it read-only, but no attempt was made to forward writes to the bus,
and read-only aliases of RAM do not prevent writes. Now, pam.c creates
a ROMD region (with read-only memory backing) coupled with a memory
operation that forwards writes to the bus.

For the WE case, a RAM alias was created, but with no attempt to
forward reads to the bus. Now, pam.c creates a MMIO region that writes
directly to RAM (bypassing the PAM region) and forwards reads to the
bus.

Additional changes:
- Change the type of pam_update parameter idx to type uint8_t,
  eliminating an assert check.
- Remove the fourth PAM alias, for normal RAM-based reads and writes
  of PAM regions, saving memory and clutter in mtree output.

Tested with SeaBIOS and AMIBIOS.

Signed-off-by: Lev Kujawski 
---
(v2) Write to an AddressSpace mapped over ram_memory instead of using
 a pointer, as it suprisingly may not be backed by RAM on, e.g.,
 NUMA configurations.

 hw/pci-host/pam.c | 136 +++---
 include/hw/pci-host/pam.h |   7 +-
 2 files changed, 118 insertions(+), 25 deletions(-)

diff --git a/hw/pci-host/pam.c b/hw/pci-host/pam.c
index 454dd120db..787428c7d8 100644
--- a/hw/pci-host/pam.c
+++ b/hw/pci-host/pam.c
@@ -28,43 +28,133 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "hw/pci-host/pam.h"
 
+static void
+pam_rmem_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+PAMMemoryRegion * const pam = (PAMMemoryRegion *)opaque;
+
+(void)memory_region_dispatch_write(pam->pci_mr, pam->offset + addr, val,
+   size_memop(size), 
MEMTXATTRS_UNSPECIFIED);
+}
+
+static uint64_t
+pam_wmem_read(void *opaque, hwaddr addr, unsigned int size)
+{
+PAMMemoryRegion * const pam = (PAMMemoryRegion *)opaque;
+uint64_t val = (uint64_t)~0;
+
+(void)memory_region_dispatch_read(pam->pci_mr, pam->offset + addr, &val,
+  size_memop(size), 
MEMTXATTRS_UNSPECIFIED);
+
+return val;
+}
+
+static void
+pam_wmem_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+PAMMemoryRegion * const pam = (PAMMemoryRegion *)opaque;
+
+switch (size) {
+case 1:
+stb_phys(&pam->ram_as, pam->offset + addr, val);
+break;
+case 2:
+stw_le_phys(&pam->ram_as, pam->offset + addr, val);
+break;
+case 4:
+stl_le_phys(&pam->ram_as, pam->offset + addr, val);
+break;
+case 8:
+stq_le_phys(&pam->ram_as, pam->offset + addr, val);
+break;
+default:
+g_assert_not_reached();
+}
+}
+
+static const MemoryRegionOps pam_rmem_ops = {
+.write = pam_rmem_write,
+};
+
+static const MemoryRegionOps pam_wmem_ops = {
+.read = pam_wmem_read,
+.write = pam_wmem_write,
+.valid = {
+.min_access_size = 1,
+.max_access_size = 8,
+.unaligned = true,
+},
+.impl = {
+.min_access_size = 1,
+.max_access_size = 8,
+.unaligned = true,
+},
+};
+
 void init_pam(DeviceState *dev, MemoryRegion *ram_memory,
-  MemoryRegion *system_memory, MemoryRegion *pci_address_space,
-  PAMMemoryRegion *mem, uint32_t start, uint32_t size)
+  MemoryRegion *system, MemoryRegion *pci,
+  PAMMemoryRegion *pam, uint32_t start, uint32_t size)
 {
+char name[12] = "pam-splitr";
 int i;
 
-/* RAM */
-memory_region_init_alias(&mem->alias[3], OBJECT(dev), "pam-ram", 
ram_memory,
- start, size);
-/* ROM (XXX: not quite correct) */
-memory_region_init_alias(&mem->alias[1], OBJECT(dev), "pam-rom", 
ram_memory,
- start, size);
-memory_region_set_readonly(&mem->alias[1], true);
+name[10] = (start >> 14) + 17;
+name[11] = '\0';
+
+/* Forward all memory accesses to the bus.  */
+memory_region_init_alias(&pam->alias[0], OBJECT(dev), "pam-pci",
+ pci, start, size);
 
-/* XXX: should distinguish read/write cases */
-memory_region_init_alias(&mem->alias[0], OBJECT(dev), "pam-pci", 
pci_address_space,
- start, size);
-memory_region_init_alias(&mem->alias[2], OBJECT(dev), "pam-pci", 
ram_memory,
- start, size);
+/* Split modes */
+/* Forward reads to RAM, writes to the bus.  */
+memory_region_init_rom_device(&pam->alias[1], OBJECT(dev),
+  &pam_rmem_ops, pam, name, size,
+  &error_fatal);
+
+/* Forward writes to RAM, reads to the bus.  */
+name[9] = 'w'

Re: [PATCH] tests/vm: do not specify -bios option

2022-06-16 Thread Paolo Bonzini


On 6/16/22 10:44, Daniel P. Berrangé wrote:

On Thu, Jun 16, 2022 at 10:30:25AM +0200, Paolo Bonzini wrote:

When running from the build tree, the executable is able to find
the BIOS on its own; when running from the source tree, a firmware
blob should already be installed and there is no guarantee that
the one in the source tree works with the QEMU that is being used for
the installation.

I think there is interaction with

   
--firmwarepath=/usr/share/qemu-firmware:/usr/share/ipxe/qemu:/usr/share/seavgabios:/usr/share/seabios:/usr/share/sgabios

as the firmware path listed there will be searched before the local
build tree.

I agree with your desire to remove the -bios option, but I'm not
sure it is entirely safe unless logic in qemu_find_file is fixed
to ignore the global search path when running from the build
tree.


Isn't this the same for any other invocation of QEMU, for example in qtest?

I admit I didn't think of the interaction with --firmwarepath, but "if 
it hurts, don't do it" might apply here.  That is, install compatible 
firmware to the path _before_ trying to use a QEMU that specifies that path.


Thanks,

Paolo

Re: [PATCH 0/4] Multiple interface support on top of Multi-FD

2022-06-16 Thread manish.mishra




On 16/06/22 1:46 pm, Daniel P. Berrangé wrote:

On Wed, Jun 15, 2022 at 08:14:26PM +0100, Dr. David Alan Gilbert wrote:

* Daniel P. Berrangé (berra...@redhat.com) wrote:

On Fri, Jun 10, 2022 at 05:58:31PM +0530, manish.mishra wrote:

On 09/06/22 9:17 pm, Daniel P. Berrangé wrote:

On Thu, Jun 09, 2022 at 07:33:01AM +, Het Gala wrote:

As of now, the multi-FD feature supports connection over the default network
only. This Patchset series is a Qemu side implementation of providing multiple
interfaces support for multi-FD. This enables us to fully utilize dedicated or
multiple NICs in case bonding of NICs is not possible.


Introduction
-
Multi-FD Qemu implementation currently supports connection only on the default
network. This forbids us from advantages like:
- Separating VM live migration traffic from the default network.

Hi Daniel,

I totally understand your concern around this approach increasing compexity 
inside qemu,

when similar things can be done with NIC teaming. But we thought this approach 
provides

much more flexibility to user in few cases like.

1. We checked our customer data, almost all of the host had multiple NIC, but 
LACP support

     in their setups was very rare. So for those cases this approach can help 
in utilise multiple

     NICs as teaming is not possible there.

AFAIK,  LACP is not required in order to do link aggregation with Linux.
Traditional Linux bonding has no special NIC hardware or switch requirements,
so LACP is merely a "nice to have" in order to simplify some aspects.

IOW, migration with traffic spread across multiple NICs is already
possible AFAICT.

Are we sure that works with multifd?  I've seen a lot of bonding NIC
setups which spread based on a hash of source/destination IP and port
numbers; given that we use the same dest port and IP at the moment what
happens in reality?  That hashing can be quite delicate for high
bandwidth single streams.

The simplest Linux bonding mode does per-packet round-robin across
NICs, so traffic from the collection of multifd connections should
fill up all the NICs in the bond. There are of course other modes
which may be sub-optimal for the reasons you describe. Which mode
to pick depends on the type of service traffic patterns you're
aiming to balance.


My understanding on networking is not good enough so apologies in advance if 
something

does not make sense. As per my understanding it is easy to do load balancing on 
sender

side because we have full control where to send packet but complicated on 
receive side

if we do not have LACP like support. I see there are some teaming technique 
which does

load balancing of incoming traffic by possibly sending different slaves mac 
address on arp

requests but that does not work for our use case and may require a complicated 
setup

for proper usage. Our use case can be something like this e.g. both source and 
destination

has 2-2 NICs of 10Gbps each and we want to get a throughput of 20Gbps for live 
migration.

thanks

Manish Mishra




Multi-interface with Multi-FD
-
Multiple-interface support over basic multi-FD has been implemented in the
patches. Advantages of this implementation are:
- Able to separate live migration traffic from default network interface by
creating multiFD channels on ip addresses of multiple non-default 
interfaces.
- Can optimize the number of multi-FD channels on a particular interface
depending upon the network bandwidth limit on a particular interface.

Manually assigning individual channels to different NICs is a pretty
inefficient way to optimizing traffic. Feels like you could easily get
into a situation where one NIC ends up idle while the other is busy,
especially if the traffic patterns are different. For example with
post-copy there's an extra channel for OOB async page requests, and
its far from clear that manually picking NICs per chanel upfront is
going work for that.  The kernel can continually dynamically balance
load on the fly and so do much better than any static mapping QEMU
tries to apply, especially if there are multiple distinct QEMU's
competing for bandwidth.


Yes, Daniel current solution is only for pre-copy. As with postcopy
multiFD is not yet supported but in future we can extend it for postcopy

I had been thinking about explicit selection of network device for NUMA
use though; ideally I'd like to be able to associate a set of multifd
threads to each NUMA node, and then associate a NIC with that set of
threads; so that the migration happens down the NIC that's on the node
the RAM is on.  On a really good day you'd have one NIC per top level
NUMA node.

Now that's an interesting idea, and not one that can be dealt with
by bonding, since the network layer won't be aware of the NUMA
affinity constraints.


With regards,
Daniel

[PATCH 3/8] migration: Add vmstate part of migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

Now we can disable and enable vmstate part by stream_content parameter.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 10 --
 migration/savevm.c| 15 +++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 4adcc87d1d..bbf9b6aad1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1334,9 +1334,15 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 }
 }
 
-static bool check_stream_parts(strList *stream_content_list)
+static bool check_stream_parts(strList *stream_list)
 {
-/* To be implemented in ext commits */
+for (; stream_list; stream_list = stream_list->next) {
+if (!strcmp(stream_list->value, "vmstate")) {
+continue;
+}
+
+return false;
+}
 return true;
 }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index c68f187ef7..48603517ba 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -949,6 +949,21 @@ static bool should_skip(SaveStateEntry *se)
 return true;
 }
 
+/*
+ * Assume that any SaveStateEntry with non-null vmsd is
+ * part of vmstate.
+ *
+ *
+ * Vmstate is included by default so firstly check if
+ * stream-content-list is enabled.
+ */
+
+if (se->vmsd &&
+migrate_get_current()->parameters.has_stream_content_list &&
+!migrate_find_stream_content("vmstate")) {
+return true;
+}
+
 return false;
 }
 
-- 
2.31.1

[PATCH 7/8] migration: analyze-migration script changed

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This script is used for RAM capabilities test. But it cannot work
in case of no vm description in migration stream.
So new flag is added to allow work this script with ram-only
migration stream.

Signed-off-by: Nikita Lapshin 
---
 scripts/analyze-migration.py | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index b82a1b0c58..80077a09bc 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -495,7 +495,7 @@ def __init__(self, filename):
 self.filename = filename
 self.vmsd_desc = None
 
-def read(self, desc_only = False, dump_memory = False, write_memory = 
False):
+def read(self, ram_only, desc_only = False, dump_memory = False, 
write_memory = False):
 # Read in the whole file
 file = MigrationFile(self.filename)
 
@@ -509,7 +509,8 @@ def read(self, desc_only = False, dump_memory = False, 
write_memory = False):
 if data != self.QEMU_VM_FILE_VERSION:
 raise Exception("Invalid version number %d" % data)
 
-self.load_vmsd_json(file)
+if not ram_only:
+self.load_vmsd_json(file)
 
 # Read sections
 self.sections = collections.OrderedDict()
@@ -518,7 +519,10 @@ def read(self, desc_only = False, dump_memory = False, 
write_memory = False):
 return
 
 ramargs = {}
-ramargs['page_size'] = self.vmsd_desc['page_size']
+if ram_only:
+ramargs['page_size'] = 4096
+else:
+ramargs['page_size'] = self.vmsd_desc['page_size']
 ramargs['dump_memory'] = dump_memory
 ramargs['write_memory'] = write_memory
 self.section_classes[('ram',0)][1] = ramargs
@@ -579,6 +583,7 @@ def default(self, o):
 parser.add_argument("-m", "--memory", help='dump RAM contents as well', 
action='store_true')
 parser.add_argument("-d", "--dump", help='what to dump ("state" or "desc")', 
default='state')
 parser.add_argument("-x", "--extract", help='extract contents into individual 
files', action='store_true')
+parser.add_argument("--ram-only", help='parse migration dump containing only 
RAM', action='store_true')
 args = parser.parse_args()
 
 jsonenc = JSONEncoder(indent=4, separators=(',', ': '))
@@ -586,14 +591,14 @@ def default(self, o):
 if args.extract:
 dump = MigrationDump(args.file)
 
-dump.read(desc_only = True)
+dump.read(desc_only = True, ram_only = args.ram_only)
 print("desc.json")
 f = open("desc.json", "w")
 f.truncate()
 f.write(jsonenc.encode(dump.vmsd_desc))
 f.close()
 
-dump.read(write_memory = True)
+dump.read(write_memory = True, ram_only = args.ram_only)
 dict = dump.getDict()
 print("state.json")
 f = open("state.json", "w")
@@ -602,12 +607,12 @@ def default(self, o):
 f.close()
 elif args.dump == "state":
 dump = MigrationDump(args.file)
-dump.read(dump_memory = args.memory)
+dump.read(dump_memory = args.memory, ram_only = args.ram_only)
 dict = dump.getDict()
 print(jsonenc.encode(dict))
 elif args.dump == "desc":
 dump = MigrationDump(args.file)
-dump.read(desc_only = True)
+dump.read(desc_only = True, ram_only = args.ram_only)
 print(jsonenc.encode(dump.vmsd_desc))
 else:
 raise Exception("Please specify either -x, -d state or -d desc")
-- 
2.31.1

Re: [PATCH v4 5/7] 9pfs: fix 'Twalk' to only send error if no component walked

2022-06-16 Thread Greg Kurz

On Wed, 15 Jun 2022 18:36:46 +0200
Christian Schoenebeck  wrote:

> On Mittwoch, 15. Juni 2022 17:52:49 CEST Greg Kurz wrote:
> > On Tue, 15 Mar 2022 11:08:39 +0100
> > 
> > Christian Schoenebeck  wrote:
> > > Current implementation of 'Twalk' request handling always sends an
> > > 'Rerror'
> > > 
> > > response if any error occured. The 9p2000 protocol spec says though:
> > >   "
> > >   If the first element cannot be walked for any reason, Rerror is
> > >   returned.
> > >   Otherwise, the walk will return an Rwalk message containing nwqid qids
> > >   corresponding, in order, to the files that are visited by the nwqid
> > >   successful elementwise walks; nwqid is therefore either nwname or the
> > >   index
> > >   of the first elementwise walk that failed.
> > >   "
> > >   
> > >   http://ericvh.github.io/9p-rfc/rfc9p2000.html#anchor33
> > > 
> > > For that reason we are no longer leaving from an error path in function
> > > v9fs_walk(), unless really no path component could be walked successfully
> > > or if the request has been interrupted.
> > > 
> > > Local variable 'nwalked' counts and reflects the number of path components
> > > successfully processed by background I/O thread, whereas local variable
> > > 'name_idx' subsequently counts and reflects the number of path components
> > > eventually accepted successfully by 9p server controller portion.
> > > 
> > > New local variable 'any_err' is an aggregate variable reflecting whether
> > > any error occurred at all, while already existing variable 'err' only
> > > reflects the last error.
> > > 
> > > Despite QIDs being delivered to client in a more relaxed way now, it is
> > > important to note though that fid still must remain unaffected if any
> > > error
> > > occurred.
> > > 
> > > Signed-off-by: Christian Schoenebeck 
> > > ---
> > > 
> > >  hw/9pfs/9p.c | 43 +++
> > >  1 file changed, 27 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> > > index 298f4e6548..e770972a71 100644
> > > --- a/hw/9pfs/9p.c
> > > +++ b/hw/9pfs/9p.c
> > > @@ -1766,7 +1766,7 @@ static void coroutine_fn v9fs_walk(void *opaque)
> > > 
> > >  {
> > >  
> > >  int name_idx, nwalked;
> > >  g_autofree V9fsQID *qids = NULL;
> > > 
> > > -int i, err = 0;
> > > +int i, err = 0, any_err = 0;
> > > 
> > >  V9fsPath dpath, path;
> > >  P9ARRAY_REF(V9fsPath) pathes = NULL;
> > >  uint16_t nwnames;
> > > 
> > > @@ -1832,19 +1832,20 @@ static void coroutine_fn v9fs_walk(void *opaque)
> > > 
> > >   * driver code altogether inside the following block.
> > >   */
> > >  
> > >  v9fs_co_run_in_worker({
> > > 
> > > +nwalked = 0;
> > > 
> > >  if (v9fs_request_cancelled(pdu)) {
> > > 
> > > -err = -EINTR;
> > > +any_err |= err = -EINTR;
> > 
> > Not super fan of such constructs but I cannot think of anything
> > better.. so be it ! :-)
> 
> Mwa, :( and I thought this was a slick (though probably yet again unorthodox) 
> way to handle aggregate errors.
> 
> [...]
> > > @@ -1874,12 +1875,12 @@ static void coroutine_fn v9fs_walk(void *opaque)
> > > 
> > >  /*
> > >  
> > >   * Handle all the rest of this Twalk request on main thread ...
> > >   */
> > > 
> > > -if (err < 0) {
> > > +if ((err < 0 && !nwalked) || err == -EINTR) {
> > 
> > So this is making an exception to the spec excerpt you're mentioning
> > in the changelog.
> > 
> > EINTR can only come from the v9fs_request_cancelled(pdu) == true case,
> > since QEMU doesn't have signal handlers AFAIK. This would be the result
> > of a TFLUSH , likely to handle ^C from the client side. I guess that in
> > that peculiar case, it quite makes sense to return RERROR/RLERROR instead
> > of the "degraded" RWALK that the end user isn't waiting for. To sum up,
> > TFLUSH behavior prevails on TWALK. Please add a comment though since
> > this isn't super obvious in the spec.
> 
> Yes, everything you said is depicting this exception here precisely, and I 
> agree that it deserves a comment for further clarification, which I'll simply 
> add on my end to avoid the noise.
> 
> Does the following sound good to you?
> 
> "NOTE: -EINTR is an exception where we deviate from the protocol spec and 
> simply send an (R)Lerror response instead of bothering to assemble a 
> (deducted) Rwalk response; because -EINTR is always the result of a Tflush 
> request, so client would no longer wait for a response in this case anyway."
> 

LGTM

> > Apart from that, LGTM.
> > 
> > Reviewed-by: Greg Kurz 
> 
> Thanks for your reviews, much appreciated!
> 

Sorry for the 3-month delay...

Cheers,

--
Greg

> Best regards,
> Christian Schoenebeck
> 
>

[PATCH 4/8] migration: Add dirty-bitmaps part of migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This patch enables and disable dirty-bitmaps in migration stream.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index bbf9b6aad1..ad789915ce 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1337,7 +1337,8 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 static bool check_stream_parts(strList *stream_list)
 {
 for (; stream_list; stream_list = stream_list->next) {
-if (!strcmp(stream_list->value, "vmstate")) {
+if (!strcmp(stream_list->value, "vmstate") ||
+!strcmp(stream_list->value, "dirty-bitmaps")) {
 continue;
 }
 
@@ -2501,7 +2502,8 @@ bool migrate_dirty_bitmaps(void)
 
 s = migrate_get_current();
 
-return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS] ||
+   migrate_find_stream_content("dirty-bitmaps");
 }
 
 bool migrate_ignore_shared(void)
-- 
2.31.1

[PATCH 2/8] migration: should_skip() implemented

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

For next changes it is convenient to make all decisions about
sections skipping in one function.

Signed-off-by: Nikita Lapshin 
---
 migration/savevm.c | 54 --
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 02ed94c180..c68f187ef7 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -943,6 +943,15 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se,
 return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
 }
 
+static bool should_skip(SaveStateEntry *se)
+{
+if (se->ops && se->ops->is_active && !se->ops->is_active(se->opaque)) {
+return true;
+}
+
+return false;
+}
+
 /*
  * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
  */
@@ -1207,10 +1216,8 @@ void qemu_savevm_state_setup(QEMUFile *f)
 if (!se->ops || !se->ops->save_setup) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 save_section_header(f, se, QEMU_VM_SECTION_START);
 
@@ -1238,10 +1245,8 @@ int qemu_savevm_state_resume_prepare(MigrationState *s)
 if (!se->ops || !se->ops->resume_prepare) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 ret = se->ops->resume_prepare(s, se->opaque);
 if (ret < 0) {
@@ -1268,8 +1273,7 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
 if (!se->ops || !se->ops->save_live_iterate) {
 continue;
 }
-if (se->ops->is_active &&
-!se->ops->is_active(se->opaque)) {
+if (should_skip(se)) {
 continue;
 }
 if (se->ops->is_active_iterate &&
@@ -1337,10 +1341,8 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
 if (!se->ops || !se->ops->save_live_complete_postcopy) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 trace_savevm_section_start(se->idstr, se->section_id);
 /* Section type */
@@ -1374,10 +1376,8 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile 
*f, bool in_postcopy)
 continue;
 }
 
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 trace_savevm_section_start(se->idstr, se->section_id);
 
@@ -1417,6 +1417,9 @@ int 
qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
 trace_savevm_section_skip(se->idstr, se->section_id);
 continue;
 }
+if (should_skip(se)) {
+continue;
+}
 
 trace_savevm_section_start(se->idstr, se->section_id);
 
@@ -1522,10 +1525,8 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t 
threshold_size,
 if (!se->ops || !se->ops->save_live_pending) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 se->ops->save_live_pending(f, se->opaque, threshold_size,
res_precopy_only, res_compatible,
@@ -1635,6 +1636,9 @@ int qemu_save_device_state(QEMUFile *f)
 if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
 continue;
 }
+if (should_skip(se)) {
+continue;
+}
 
 save_section_header(f, se, QEMU_VM_SECTION_FULL);
 
@@ -2542,10 +2546,8 @@ static int qemu_loadvm_state_setup(QEMUFile *f)
 if (!se->ops || !se->ops->load_setup) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 
 ret = se->ops->load_setup(f, se->opaque);
-- 
2.31.1

[PATCH 6/8] migration: Add RAM part of migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

'ram' parameter enable RAM sections in migration stream. If it
isn't specified it will be skipped.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 17 -
 migration/migration.h |  1 +
 migration/ram.c   |  6 ++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index d81f3c6891..6528b3ad41 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1339,7 +1339,8 @@ static bool check_stream_parts(strList *stream_list)
 for (; stream_list; stream_list = stream_list->next) {
 if (!strcmp(stream_list->value, "vmstate") ||
 !strcmp(stream_list->value, "dirty-bitmaps") ||
-!strcmp(stream_list->value, "block")) {
+!strcmp(stream_list->value, "block") ||
+!strcmp(stream_list->value, "ram")) {
 continue;
 }
 
@@ -2653,6 +2654,20 @@ bool migrate_background_snapshot(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
 }
 
+bool migrate_ram(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+/*
+ * By default RAM is enabled so if stream-content-list disabled
+ * RAM will be passed.
+ */
+return !s->parameters.has_stream_content_list ||
+   migrate_find_stream_content("ram");
+}
+
 /* Checks if stream-content parameter has section_name in list */
 bool migrate_find_stream_content(const char *section_name)
 {
diff --git a/migration/migration.h b/migration/migration.h
index 411c58e919..5c43788a2b 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -395,6 +395,7 @@ int migrate_decompress_threads(void);
 bool migrate_use_events(void);
 bool migrate_postcopy_blocktime(void);
 bool migrate_background_snapshot(void);
+bool migrate_ram(void);
 
 bool migrate_find_stream_content(const char *section_name);
 
diff --git a/migration/ram.c b/migration/ram.c
index 170e522a1f..ddc7abd08a 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -4263,6 +4263,11 @@ static int ram_resume_prepare(MigrationState *s, void 
*opaque)
 return 0;
 }
 
+static bool is_ram_active(void *opaque)
+{
+return migrate_ram();
+}
+
 static SaveVMHandlers savevm_ram_handlers = {
 .save_setup = ram_save_setup,
 .save_live_iterate = ram_save_iterate,
@@ -4275,6 +4280,7 @@ static SaveVMHandlers savevm_ram_handlers = {
 .load_setup = ram_load_setup,
 .load_cleanup = ram_load_cleanup,
 .resume_prepare = ram_resume_prepare,
+.is_active = is_ram_active,
 };
 
 static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
-- 
2.31.1

[PATCH 0/8] New parameter for migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

We want to implement exteranl bg-snapshot tool for saving RAM. For this it
is important to be able manage migration stream because tool has no idea
about non-RAM part and how to parse it.

This paramter provides gurantee about migration content. Now there are 4
parts of migration stream which can be specified: "ram", "block",
"dirty-bitmaps", "vmstate". "vmstate" can be any section which handler has
vmdesc. "block" and "dirty-bitmpas" impleneted just like existing
capabilities.

This way of specifying can be extended on future parts of migration.

Nikita Lapshin (8):
  migration: Implemented new parameter stream_content
  migration: should_skip() implemented
  migration: Add vmstate part of migration stream
  migration: Add dirty-bitmaps part of migration stream
  migration: Add block part of migration stream
  migration: Add RAM part of migration stream
  migration: analyze-migration script changed
  migration: Test for RAM and vmstate parts

 migration/migration.c | 76 ++-
 migration/migration.h |  3 +
 migration/ram.c   |  6 ++
 migration/savevm.c| 69 -
 qapi/migration.json   | 21 +++-
 scripts/analyze-migration.py  | 19 ++--
 .../tests/migrate-ram-stream-content-test | 96 +++
 .../tests/migrate-ram-stream-content-test.out |  5 +
 8 files changed, 256 insertions(+), 39 deletions(-)
 create mode 100755 tests/qemu-iotests/tests/migrate-ram-stream-content-test
 create mode 100644 tests/qemu-iotests/tests/migrate-ram-stream-content-test.out

-- 
2.31.1

[PATCH 4/8] igration: Add dirty-bitmaps part of migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This patch enables and disable dirty-bitmaps in migration stream.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index bbf9b6aad1..ad789915ce 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1337,7 +1337,8 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 static bool check_stream_parts(strList *stream_list)
 {
 for (; stream_list; stream_list = stream_list->next) {
-if (!strcmp(stream_list->value, "vmstate")) {
+if (!strcmp(stream_list->value, "vmstate") ||
+!strcmp(stream_list->value, "dirty-bitmaps")) {
 continue;
 }
 
@@ -2501,7 +2502,8 @@ bool migrate_dirty_bitmaps(void)
 
 s = migrate_get_current();
 
-return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS] ||
+   migrate_find_stream_content("dirty-bitmaps");
 }
 
 bool migrate_ignore_shared(void)
-- 
2.31.1

[PATCH 5/8] Add block part of migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This patch enables and disable block in migration stream.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index ad789915ce..d81f3c6891 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1338,7 +1338,8 @@ static bool check_stream_parts(strList *stream_list)
 {
 for (; stream_list; stream_list = stream_list->next) {
 if (!strcmp(stream_list->value, "vmstate") ||
-!strcmp(stream_list->value, "dirty-bitmaps")) {
+!strcmp(stream_list->value, "dirty-bitmaps") ||
+!strcmp(stream_list->value, "block")) {
 continue;
 }
 
@@ -2621,7 +2622,8 @@ bool migrate_use_block(void)
 
 s = migrate_get_current();
 
-return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
+return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK] ||
+   migrate_find_stream_content("block");
 }
 
 bool migrate_use_return_path(void)
-- 
2.31.1

[PATCH v3 03/17] migration: Add vmstate part of migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

Now we can disable and enable vmstate part by stream_content parameter.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 10 --
 migration/savevm.c| 15 +++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 4adcc87d1d..bbf9b6aad1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1334,9 +1334,15 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 }
 }
 
-static bool check_stream_parts(strList *stream_content_list)
+static bool check_stream_parts(strList *stream_list)
 {
-/* To be implemented in ext commits */
+for (; stream_list; stream_list = stream_list->next) {
+if (!strcmp(stream_list->value, "vmstate")) {
+continue;
+}
+
+return false;
+}
 return true;
 }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index c68f187ef7..48603517ba 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -949,6 +949,21 @@ static bool should_skip(SaveStateEntry *se)
 return true;
 }
 
+/*
+ * Assume that any SaveStateEntry with non-null vmsd is
+ * part of vmstate.
+ *
+ *
+ * Vmstate is included by default so firstly check if
+ * stream-content-list is enabled.
+ */
+
+if (se->vmsd &&
+migrate_get_current()->parameters.has_stream_content_list &&
+!migrate_find_stream_content("vmstate")) {
+return true;
+}
+
 return false;
 }
 
-- 
2.31.1

[PATCH 1/8] migration: Implemented new parameter stream_content

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This new optional parameter contains inormation about migration
stream parts to be sent (such as RAM, block, bitmap). This looks
better than using capabilities to solve problem of dividing
migration stream.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 47 ++-
 migration/migration.h |  2 ++
 qapi/migration.json   | 21 ---
 3 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 695f0f2900..4adcc87d1d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1334,6 +1334,12 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 }
 }
 
+static bool check_stream_parts(strList *stream_content_list)
+{
+/* To be implemented in ext commits */
+return true;
+}
+
 /*
  * Check whether the parameters are valid. Error will be put into errp
  * (if provided). Return true if valid, otherwise false.
@@ -1482,7 +1488,12 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 return false;
 }
 
-return true;
+if (params->has_stream_content_list &&
+!check_stream_parts(params->stream_content_list)) {
+error_setg(errp, "Invalid parts of stream given for stream-content");
+}
+
+   return true;
 }
 
 static void migrate_params_test_apply(MigrateSetParameters *params,
@@ -1581,6 +1592,11 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 dest->has_block_bitmap_mapping = true;
 dest->block_bitmap_mapping = params->block_bitmap_mapping;
 }
+
+if (params->has_stream_content_list) {
+dest->has_stream_content_list = true;
+dest->stream_content_list = params->stream_content_list;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1703,6 +1719,13 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 QAPI_CLONE(BitmapMigrationNodeAliasList,
params->block_bitmap_mapping);
 }
+
+if (params->has_stream_content_list) {
+qapi_free_strList(s->parameters.stream_content_list);
+s->parameters.has_stream_content_list = true;
+s->parameters.stream_content_list =
+QAPI_CLONE(strList, params->stream_content_list);
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -2620,6 +2643,28 @@ bool migrate_background_snapshot(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
 }
 
+/* Checks if stream-content parameter has section_name in list */
+bool migrate_find_stream_content(const char *section_name)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+if (!s->parameters.has_stream_content_list) {
+return false;
+}
+
+strList *list = s->parameters.stream_content_list;
+
+for (; list; list = list->next) {
+if (!strcmp(list->value, section_name)) {
+return true;
+}
+}
+
+return false;
+}
+
 /* migration thread support */
 /*
  * Something bad happened to the RP stream, mark an error
diff --git a/migration/migration.h b/migration/migration.h
index 2de861df01..411c58e919 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -396,6 +396,8 @@ bool migrate_use_events(void);
 bool migrate_postcopy_blocktime(void);
 bool migrate_background_snapshot(void);
 
+bool migrate_find_stream_content(const char *section_name);
+
 /* Sending on the return path - generic and then for each message type */
 void migrate_send_rp_shut(MigrationIncomingState *mis,
   uint32_t value);
diff --git a/qapi/migration.json b/qapi/migration.json
index 18e2610e88..80acf6dbc3 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -760,6 +760,12 @@
 #block device name if there is one, and to their node 
name
 #otherwise. (Since 5.2)
 #
+# @stream-content-list: Parameter control content of migration stream such as 
RAM,
+#   vmstate, block and dirty-bitmaps. This is optional 
parameter
+#   so migration will work correctly without it.
+#   This parameter takes string list as description of 
content
+#   and include that part of migration stream. (Since 7.0)
+#
 # Features:
 # @unstable: Member @x-checkpoint-delay is experimental.
 #
@@ -780,7 +786,8 @@
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
'multifd-zlib-level' ,'multifd-zstd-level',
-   'block-bitmap-mapping' ] }
+   'block-bitmap-mapping',
+   'stream-content-list' ] }
 
 ##
 # @MigrateSetParameters:
@@ -925,6 +932,12 @@
 #block device name if there is one, and to their node 
name
 #otherwise. (Since 5.2

[PATCH v3 00/17] migration/snapshot: External snapshot utility

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

Changes v2 -> v3
 * Refactored tool code to decrease duplications of migration code.
 * Used sequential migration for saving which means that vmstate
   will be send first and only after that ram part will be sent.
   For this purpose stream-content-list paramter was used.
 * Removed aio work with block driver. Should be replaced with
   existing in qcow2 format in next versions.
 * Removed postcopy percent. Should be added in next versions.

Changes v1 -> v2:
 * Fixed CI checks

Changes v0 -> v1:
 * Changed command-line format, now use blockdev specification to
   define vmstate image.
 * Don't deal with image creation in the tool, create externally.
 * Better block layer AIO handling in the load path.
 * Reduced fragmentation of the image backing file by using 'writtent-slice'
   bitmaps in RAM blocks. Zero block write is issued to a never written slice
   before the actual memory page write takes place.
 * Improved load performance in postcopy by using 'loaded-slice' bitmaps
   in RAM blocks.
 * Refactored error handling/messages.
 * Refactored namings.

This series is a kind of PoC for asynchronous snapshot reverting. It is
about external snapshots only and doesn't involve block devices. Thus, it's
mainly intended to be used with the new 'background-snapshot' migration
capability and otherwise standard QEMU migration mechanism.

The major ideas behind this version were:
  * Make it compatible with 'exec:'-style migration - options can be create
some separate tool or integrate into qemu-system.
  * Support asynchronous revert stage by using unaltered postcopy logic
at destination. To do this, we should be capable of saving RAM pages
so that any particular page can be directly addressed by it's block ID
and page offset. Possible solutions here seem to be:
  use separate index (and storing it somewhere)
  create sparse file on host FS and address pages with file offset
  use QCOW2 (or other) image container with inherent sparsity support
  * Make image file dense on the host FS so we don't depend on
copy/backup tools and how they deal with sparse files. Off course,
there's some performance cost for this choice.
  * Try to keep page save latencies small while not degrading migration
bandwidth too much.

This version of snapshot-tool is the first step to integrate tool into
main QEMU. Now tool replace ram hanlers so it can call existing functions
in migration/* part to parse migration stream.

For the storage format, QCOW2 as a container and large (1MB) cluster size seem
to be an optimal choice. Larger cluster is beneficial for performance 
particularly
in the case when image preallocation is disabled. Such cluster size does not 
result
in too high internal fragmentation level (~10% of space waste in most cases) yet
allows to reduce significantly the number of expensive cluster allocations.

"stream-content-list"
There was no strict guarantee that there is no sections in ram part
rather than ram. So to solve this problem we decided to implement
parameters stream-content-list to provide such guarantee strictly.
This decision also helps with reusing of existed migration code.
You can see it in tool load part where tool disables all handlers except
ram using this parameter. If you have already seen it in previous patches you
can skip first 8 commits.

"sequential migration"
One problem remains unsolved. We need to run two migrations first to
save vmstate and second to save ram. We cannot run migration if VM is in
postmigrate state. But if we want to make snapshot this prohibition is
unnecessary so I changed some parts of migration and softmmu so
sequential migration become permitted. But that is not a solution. May
be new capability should be implementedi for that purpose.

Some of the upgrades were removed for now. This happened because of refactoring
and should be implemented in next versions.

How to use:

**Save:**
* > qemu-img create -f qcow2 -o size=<2_x_ram_size>,cluster_size=1M,
   preallocation=off,refcount_bits=8 
* qemu> migrate_set_capability background-snapshot on
* #set SCL to "vmstate" only
* qemu> migrate "exec:qemu-snapshot --save-vmstate
   ,cache.direct=off,file.aio=threads"
* #set SCL to "ram" only
* qemu> migrate "exec:qemu-snapshot
   ,cache.direct=off,file.aio=threads" 

**Load:**
* Use 'qemu-system-* -incoming defer'
* qemu> migrate_incoming "exec:qemu-snapshot --revert
   ,cache.direct=on,file.aio=native"

**Load with postcopy:**
* Use 'qemu-system-* -incoming defer'
* qemu> migrate_set_capability postcopy-ram on
* qemu> migrate_incoming "exec:qemu-snapshot --revert --postcopy
   ,cache.direct=on,file.aio=native"

Nikita Lapshin (17):
  migration: Implemented new parameter stream_content
  migration: should_skip() implemented
  migration: Add vmstate part of migration stream
  igration: Add dirty-bitmaps part of migration stream
  Add block part of migration stream
  migration: Add RA

[PATCH v3 04/17] migration: Add dirty-bitmaps part of migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This patch enables and disable dirty-bitmaps in migration stream.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index bbf9b6aad1..ad789915ce 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1337,7 +1337,8 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 static bool check_stream_parts(strList *stream_list)
 {
 for (; stream_list; stream_list = stream_list->next) {
-if (!strcmp(stream_list->value, "vmstate")) {
+if (!strcmp(stream_list->value, "vmstate") ||
+!strcmp(stream_list->value, "dirty-bitmaps")) {
 continue;
 }
 
@@ -2501,7 +2502,8 @@ bool migrate_dirty_bitmaps(void)
 
 s = migrate_get_current();
 
-return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS] ||
+   migrate_find_stream_content("dirty-bitmaps");
 }
 
 bool migrate_ignore_shared(void)
-- 
2.31.1

[PATCH 8/8] migration: Test for RAM and vmstate parts

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

All other parts works just like existed capabilities. Thus there is
no need to make new tests. Though RAM and vmstate are new so here
is new test for that parts.

Signed-off-by: Nikita Lapshin 
---
 .../tests/migrate-ram-stream-content-test | 96 +++
 .../tests/migrate-ram-stream-content-test.out |  5 +
 2 files changed, 101 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/migrate-ram-stream-content-test
 create mode 100644 tests/qemu-iotests/tests/migrate-ram-stream-content-test.out

diff --git a/tests/qemu-iotests/tests/migrate-ram-stream-content-test 
b/tests/qemu-iotests/tests/migrate-ram-stream-content-test
new file mode 100755
index 00..2855ca4a64
--- /dev/null
+++ b/tests/qemu-iotests/tests/migrate-ram-stream-content-test
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# group: rw migration
+#
+# Tests for 'no-ram' and 'ram-only' capabilities
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import json
+import subprocess
+import iotests
+
+img = os.path.join(iotests.test_dir, 'disk.img')
+
+class TestRamCapabilities(iotests.QMPTestCase):
+def setUp(self):
+iotests.qemu_img('create', '-f', iotests.imgfmt, img, '10M')
+self.vm = iotests.VM()
+self.vm.launch()
+self.vm.qmp('migrate-set-capabilities', capabilities=[
+{
+'capability': 'events',
+'state': True
+}
+])
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(img)
+
+def check_ram_only(self, output):
+str_json = output.decode()
+json_obj = json.loads(str_json)
+
+success = False
+for key in json_obj:
+self.assertTrue("ram" in key)
+success = True
+self.assertTrue(success)
+
+def run_migration(self, no_ram, tmp_stream):
+if no_ram:
+output = self.vm.qmp('migrate-set-parameters',
+stream_content_list = ['vmstate'])
+else:
+self.vm.qmp('migrate-set-parameters',
+stream_content_list = ['ram'])
+
+self.vm.qmp('migrate', uri='exec:cat>' + tmp_stream)
+
+while True:
+event = self.vm.event_wait('MIGRATION')
+
+if event['data']['status'] == 'completed':
+break
+
+
+def test_no_ram(self):
+with iotests.FilePath('tmp_stream') as tmp_stream:
+self.run_migration(True, tmp_stream)
+output = subprocess.run(
+['../../../scripts/analyze-migration.py', '-f', tmp_stream],
+stdout=subprocess.PIPE,
+stderr=subprocess.STDOUT,
+check=False).stdout
+
+self.assertFalse('ram' in output.decode())
+
+def test_ram_only(self):
+with iotests.FilePath('tmp_stream') as tmp_stream:
+self.run_migration(False, tmp_stream)
+output = subprocess.run(
+['../../../scripts/analyze-migration.py', '-f', tmp_stream,
+'--ram-only'],
+stdout=subprocess.PIPE,
+stderr=subprocess.STDOUT,
+check=False).stdout
+
+self.check_ram_only(output)
+
+if __name__ == '__main__':
+iotests.main(supported_protocols=['file'])
diff --git a/tests/qemu-iotests/tests/migrate-ram-stream-content-test.out 
b/tests/qemu-iotests/tests/migrate-ram-stream-content-test.out
new file mode 100644
index 00..fbc63e62f8
--- /dev/null
+++ b/tests/qemu-iotests/tests/migrate-ram-stream-content-test.out
@@ -0,0 +1,5 @@
+..
+--
+Ran 2 tests
+
+OK
-- 
2.31.1

[PATCH v3 05/17] migration: Add block part of migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This patch enables and disable block in migration stream.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index ad789915ce..d81f3c6891 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1338,7 +1338,8 @@ static bool check_stream_parts(strList *stream_list)
 {
 for (; stream_list; stream_list = stream_list->next) {
 if (!strcmp(stream_list->value, "vmstate") ||
-!strcmp(stream_list->value, "dirty-bitmaps")) {
+!strcmp(stream_list->value, "dirty-bitmaps") ||
+!strcmp(stream_list->value, "block")) {
 continue;
 }
 
@@ -2621,7 +2622,8 @@ bool migrate_use_block(void)
 
 s = migrate_get_current();
 
-return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
+return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK] ||
+   migrate_find_stream_content("block");
 }
 
 bool migrate_use_return_path(void)
-- 
2.31.1

[PATCH v3 10/17] migration/snapshot: Build changes for qemu-snapshot-tool

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

Tool uses part of migration code thus new lib with stubs for migration
code is added here.

Signed-off-by: Nikita Lapshin 
---
 meson.build   | 18 ++
 migration/meson.build |  3 ++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 2d6601467f..ba3b9ab4bd 100644
--- a/meson.build
+++ b/meson.build
@@ -3287,6 +3287,16 @@ if xkbcommon.found()
 endif
 
 if have_tools
+  tool_inc = include_directories('include/hw/core', 'include')
+  lib_tool = static_library('snapshot-tool',
+sources: [migration_files],
+dependencies: arch_deps,
+include_directories: [tool_inc, target_inc],
+name_suffix: 'fa',
+build_by_default: false)
+  snapshot_tool = declare_dependency(link_with: lib_tool,
+   dependencies: [zlib, qom, io])
+
   qemu_img = executable('qemu-img', [files('qemu-img.c'), hxdep],
  dependencies: [authz, block, crypto, io, qom, qemuutil], install: 
true)
   qemu_io = executable('qemu-io', files('qemu-io.c'),
@@ -3294,6 +3304,14 @@ if have_tools
   qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
dependencies: [blockdev, qemuutil, gnutls, selinux],
install: true)
+  qemu_snapshot = executable('qemu-snapshot',
+ files('qemu-snapshot.c'),
+ dependencies: [snapshot_tool] + arch_deps + deps,
+ objects: lib.extract_all_objects(recursive: true),
+ link_depends: [block_syms, qemu_syms],
+ link_args: link_args,
+
+install: true)
 
   subdir('storage-daemon')
   subdir('contrib/rdmacm-mux')
diff --git a/migration/meson.build b/migration/meson.build
index 8b5ca5c047..13498a6db3 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -7,6 +7,8 @@ migration_files = files(
   'qemu-file-channel.c',
   'qemu-file.c',
   'yank_functions.c',
+  'migration.c',
+  'qemu-snapshot.c',
 )
 softmmu_ss.add(migration_files)
 
@@ -18,7 +20,6 @@ softmmu_ss.add(files(
   'exec.c',
   'fd.c',
   'global_state.c',
-  'migration.c',
   'multifd.c',
   'multifd-zlib.c',
   'postcopy-ram.c',
-- 
2.31.1

[PATCH 5/8] migration: Add block part of migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This patch enables and disable block in migration stream.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index ad789915ce..d81f3c6891 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1338,7 +1338,8 @@ static bool check_stream_parts(strList *stream_list)
 {
 for (; stream_list; stream_list = stream_list->next) {
 if (!strcmp(stream_list->value, "vmstate") ||
-!strcmp(stream_list->value, "dirty-bitmaps")) {
+!strcmp(stream_list->value, "dirty-bitmaps") ||
+!strcmp(stream_list->value, "block")) {
 continue;
 }
 
@@ -2621,7 +2622,8 @@ bool migrate_use_block(void)
 
 s = migrate_get_current();
 
-return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
+return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK] ||
+   migrate_find_stream_content("block");
 }
 
 bool migrate_use_return_path(void)
-- 
2.31.1

[PATCH v3 07/17] migration: analyze-migration script changed

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This script is used for RAM capabilities test. But it cannot work
in case of no vm description in migration stream.
So new flag is added to allow work this script with ram-only
migration stream.

Signed-off-by: Nikita Lapshin 
---
 scripts/analyze-migration.py | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index b82a1b0c58..80077a09bc 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -495,7 +495,7 @@ def __init__(self, filename):
 self.filename = filename
 self.vmsd_desc = None
 
-def read(self, desc_only = False, dump_memory = False, write_memory = 
False):
+def read(self, ram_only, desc_only = False, dump_memory = False, 
write_memory = False):
 # Read in the whole file
 file = MigrationFile(self.filename)
 
@@ -509,7 +509,8 @@ def read(self, desc_only = False, dump_memory = False, 
write_memory = False):
 if data != self.QEMU_VM_FILE_VERSION:
 raise Exception("Invalid version number %d" % data)
 
-self.load_vmsd_json(file)
+if not ram_only:
+self.load_vmsd_json(file)
 
 # Read sections
 self.sections = collections.OrderedDict()
@@ -518,7 +519,10 @@ def read(self, desc_only = False, dump_memory = False, 
write_memory = False):
 return
 
 ramargs = {}
-ramargs['page_size'] = self.vmsd_desc['page_size']
+if ram_only:
+ramargs['page_size'] = 4096
+else:
+ramargs['page_size'] = self.vmsd_desc['page_size']
 ramargs['dump_memory'] = dump_memory
 ramargs['write_memory'] = write_memory
 self.section_classes[('ram',0)][1] = ramargs
@@ -579,6 +583,7 @@ def default(self, o):
 parser.add_argument("-m", "--memory", help='dump RAM contents as well', 
action='store_true')
 parser.add_argument("-d", "--dump", help='what to dump ("state" or "desc")', 
default='state')
 parser.add_argument("-x", "--extract", help='extract contents into individual 
files', action='store_true')
+parser.add_argument("--ram-only", help='parse migration dump containing only 
RAM', action='store_true')
 args = parser.parse_args()
 
 jsonenc = JSONEncoder(indent=4, separators=(',', ': '))
@@ -586,14 +591,14 @@ def default(self, o):
 if args.extract:
 dump = MigrationDump(args.file)
 
-dump.read(desc_only = True)
+dump.read(desc_only = True, ram_only = args.ram_only)
 print("desc.json")
 f = open("desc.json", "w")
 f.truncate()
 f.write(jsonenc.encode(dump.vmsd_desc))
 f.close()
 
-dump.read(write_memory = True)
+dump.read(write_memory = True, ram_only = args.ram_only)
 dict = dump.getDict()
 print("state.json")
 f = open("state.json", "w")
@@ -602,12 +607,12 @@ def default(self, o):
 f.close()
 elif args.dump == "state":
 dump = MigrationDump(args.file)
-dump.read(dump_memory = args.memory)
+dump.read(dump_memory = args.memory, ram_only = args.ram_only)
 dict = dump.getDict()
 print(jsonenc.encode(dict))
 elif args.dump == "desc":
 dump = MigrationDump(args.file)
-dump.read(desc_only = True)
+dump.read(desc_only = True, ram_only = args.ram_only)
 print(jsonenc.encode(dump.vmsd_desc))
 else:
 raise Exception("Please specify either -x, -d state or -d desc")
-- 
2.31.1

Re: [PATCH 5/8] Add block part of migration stream

2022-06-16 Thread Nikita


Sorry, that one wasn't supposed to be sent.

[PATCH v3 14/17] migration/snpashot: Implement API for RAMBlock

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

Implemented RAMBlock is used for managing ram block from VM.
This structure is close to existing RAMBlock in migration but
has few differences.

May be it should be replaced with existing RAMBlock it can lead to
lots of reuses.

Signed-off-by: Nikita Lapshin 
---
 migration/qemu-snapshot.c | 180 ++
 1 file changed, 180 insertions(+)

diff --git a/migration/qemu-snapshot.c b/migration/qemu-snapshot.c
index f7695e75c7..394c6acb77 100644
--- a/migration/qemu-snapshot.c
+++ b/migration/qemu-snapshot.c
@@ -23,13 +23,193 @@
 #include "migration/ram.h"
 #include "qemu-snapshot.h"
 
+/* RAM block */
+/* TODO RAMBlock should be replace with existing struct RAMBlock in ram.c */
+typedef struct RAMBlock {
+int64_t bdrv_offset;/* Offset on backing storage */
+int64_t length; /* Length */
+int64_t nr_pages;   /* Page count */
+int64_t nr_slices;  /* Number of slices (for bitmap bookkeeping) */
+int64_t discard_offset; /* Used for postcopy dicarding of ram blocks */
+
+unsigned long *bitmap;  /* Bitmap of RAM slices */
+
+/* Link into ram_list */
+QSIMPLEQ_ENTRY(RAMBlock) next;
+
+char idstr[256];/* RAM block id string */
+} RAMBlock;
+
 /* RAM transfer context */
 typedef struct RAMCtx {
 int64_t normal_pages;   /* Total number of normal pages */
+
+/* RAM block list head */
+QSIMPLEQ_HEAD(, RAMBlock) ram_block_list;
+
 } RAMCtx;
 
 static RAMCtx ram_ctx;
 
+static inline
+bool ram_offset_in_block(RAMBlock *block, int64_t offset)
+{
+return block && offset < block->length;
+}
+
+static inline
+bool ram_bdrv_offset_in_block(RAMBlock *block, int64_t bdrv_offset)
+{
+return block && bdrv_offset >= block->bdrv_offset &&
+bdrv_offset < block->bdrv_offset + block->length;
+}
+
+static inline
+int64_t ram_bdrv_from_block_offset(RAMBlock *block, int64_t offset)
+{
+if (!ram_offset_in_block(block, offset)) {
+return INVALID_OFFSET;
+}
+
+return block->bdrv_offset + offset;
+}
+
+static inline
+int64_t ram_block_offset_from_bdrv(RAMBlock *block, int64_t bdrv_offset)
+{
+int64_t offset;
+
+if (!block) {
+return INVALID_OFFSET;
+}
+
+offset = bdrv_offset - block->bdrv_offset;
+return offset >= 0 ? offset : INVALID_OFFSET;
+}
+
+static RAMBlock *ram_block_by_idstr(const char *idstr)
+{
+RAMBlock *block;
+
+QSIMPLEQ_FOREACH(block, &ram_ctx.ram_block_list, next) {
+if (!strcmp(idstr, block->idstr)) {
+return block;
+}
+}
+
+return NULL;
+}
+
+/*
+ * Assume QEMUFile is migration stream and try to get ram block from it.
+ * Also check if this ram block exists.
+ */
+static RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
+{
+static RAMBlock *block;
+char idstr[256];
+
+if (flags & RAM_SAVE_FLAG_CONTINUE) {
+if (!block) {
+error_report("RAM_SAVE_FLAG_CONTINUE outside RAM block");
+return NULL;
+}
+
+return block;
+}
+
+if (!qemu_get_counted_string(f, idstr)) {
+error_report("Failed to get RAM block name");
+return NULL;
+}
+
+block = ram_block_by_idstr(idstr);
+if (!block) {
+error_report("Can't find RAM block %s", idstr);
+return NULL;
+}
+
+return block;
+}
+
+static int64_t ram_block_next_bdrv_offset(void)
+{
+RAMBlock *last_block;
+int64_t offset;
+
+last_block = QSIMPLEQ_LAST(&ram_ctx.ram_block_list, RAMBlock, next);
+if (!last_block) {
+return 0;
+}
+
+offset = last_block->bdrv_offset + last_block->length;
+return ROUND_UP(offset, BDRV_CLUSTER_SIZE);
+}
+
+static void ram_block_add(const char *idstr, int64_t size)
+{
+RAMBlock *block;
+
+block = g_new0(RAMBlock, 1);
+block->length = size;
+block->bdrv_offset = ram_block_next_bdrv_offset();
+strcpy(block->idstr, idstr);
+
+QSIMPLEQ_INSERT_TAIL(&ram_ctx.ram_block_list, block, next);
+}
+
+/*
+ * Assume that QEMUFile is migration stream and try to get
+ * from f_src ram blocks list. mem_size is a total amount of bytes of whole
+ * ram blocks.
+ */
+static int ram_block_list_from_stream(QEMUFile *f_src, int64_t mem_size)
+{
+int64_t total_ram_bytes;
+
+total_ram_bytes = mem_size;
+while (total_ram_bytes > 0) {
+char idstr[256];
+int64_t size;
+
+if (!qemu_get_counted_string(f_src, idstr)) {
+error_report("Failed to get RAM block list");
+return -EINVAL;
+}
+
+size = qemu_get_be64(f_src);
+
+ram_block_add(idstr, size);
+total_ram_bytes -= size;
+}
+
+if (total_ram_bytes != 0) {
+error_report("Corrupted RAM block list");
+return -EINVAL;
+}
+
+return 0;
+}
+
+/* Send ram block list using migration rule */
+static int ram_block_list_to_stream(QEMUFile *f_dest)
+{
+RAMBlock *block;
+uint64_t total = 0;

[PATCH v3 01/17] migration: Implemented new parameter stream_content

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This new optional parameter contains inormation about migration
stream parts to be sent (such as RAM, block, bitmap). This looks
better than using capabilities to solve problem of dividing
migration stream.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 47 ++-
 migration/migration.h |  2 ++
 qapi/migration.json   | 21 ---
 3 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 695f0f2900..4adcc87d1d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1334,6 +1334,12 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 }
 }
 
+static bool check_stream_parts(strList *stream_content_list)
+{
+/* To be implemented in ext commits */
+return true;
+}
+
 /*
  * Check whether the parameters are valid. Error will be put into errp
  * (if provided). Return true if valid, otherwise false.
@@ -1482,7 +1488,12 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 return false;
 }
 
-return true;
+if (params->has_stream_content_list &&
+!check_stream_parts(params->stream_content_list)) {
+error_setg(errp, "Invalid parts of stream given for stream-content");
+}
+
+   return true;
 }
 
 static void migrate_params_test_apply(MigrateSetParameters *params,
@@ -1581,6 +1592,11 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 dest->has_block_bitmap_mapping = true;
 dest->block_bitmap_mapping = params->block_bitmap_mapping;
 }
+
+if (params->has_stream_content_list) {
+dest->has_stream_content_list = true;
+dest->stream_content_list = params->stream_content_list;
+}
 }
 
 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1703,6 +1719,13 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 QAPI_CLONE(BitmapMigrationNodeAliasList,
params->block_bitmap_mapping);
 }
+
+if (params->has_stream_content_list) {
+qapi_free_strList(s->parameters.stream_content_list);
+s->parameters.has_stream_content_list = true;
+s->parameters.stream_content_list =
+QAPI_CLONE(strList, params->stream_content_list);
+}
 }
 
 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -2620,6 +2643,28 @@ bool migrate_background_snapshot(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
 }
 
+/* Checks if stream-content parameter has section_name in list */
+bool migrate_find_stream_content(const char *section_name)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+if (!s->parameters.has_stream_content_list) {
+return false;
+}
+
+strList *list = s->parameters.stream_content_list;
+
+for (; list; list = list->next) {
+if (!strcmp(list->value, section_name)) {
+return true;
+}
+}
+
+return false;
+}
+
 /* migration thread support */
 /*
  * Something bad happened to the RP stream, mark an error
diff --git a/migration/migration.h b/migration/migration.h
index 2de861df01..411c58e919 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -396,6 +396,8 @@ bool migrate_use_events(void);
 bool migrate_postcopy_blocktime(void);
 bool migrate_background_snapshot(void);
 
+bool migrate_find_stream_content(const char *section_name);
+
 /* Sending on the return path - generic and then for each message type */
 void migrate_send_rp_shut(MigrationIncomingState *mis,
   uint32_t value);
diff --git a/qapi/migration.json b/qapi/migration.json
index 18e2610e88..80acf6dbc3 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -760,6 +760,12 @@
 #block device name if there is one, and to their node 
name
 #otherwise. (Since 5.2)
 #
+# @stream-content-list: Parameter control content of migration stream such as 
RAM,
+#   vmstate, block and dirty-bitmaps. This is optional 
parameter
+#   so migration will work correctly without it.
+#   This parameter takes string list as description of 
content
+#   and include that part of migration stream. (Since 7.0)
+#
 # Features:
 # @unstable: Member @x-checkpoint-delay is experimental.
 #
@@ -780,7 +786,8 @@
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
'multifd-zlib-level' ,'multifd-zstd-level',
-   'block-bitmap-mapping' ] }
+   'block-bitmap-mapping',
+   'stream-content-list' ] }
 
 ##
 # @MigrateSetParameters:
@@ -925,6 +932,12 @@
 #block device name if there is one, and to their node 
name
 #otherwise. (Since 5.2

[PATCH v3 08/17] migration: Test for RAM and vmstate parts

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

All other parts works just like existed capabilities. Thus there is
no need to make new tests. Though RAM and vmstate are new so here
is new test for that parts.

Signed-off-by: Nikita Lapshin 
---
 .../tests/migrate-ram-stream-content-test | 96 +++
 .../tests/migrate-ram-stream-content-test.out |  5 +
 2 files changed, 101 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/migrate-ram-stream-content-test
 create mode 100644 tests/qemu-iotests/tests/migrate-ram-stream-content-test.out

diff --git a/tests/qemu-iotests/tests/migrate-ram-stream-content-test 
b/tests/qemu-iotests/tests/migrate-ram-stream-content-test
new file mode 100755
index 00..2855ca4a64
--- /dev/null
+++ b/tests/qemu-iotests/tests/migrate-ram-stream-content-test
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# group: rw migration
+#
+# Tests for 'no-ram' and 'ram-only' capabilities
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import json
+import subprocess
+import iotests
+
+img = os.path.join(iotests.test_dir, 'disk.img')
+
+class TestRamCapabilities(iotests.QMPTestCase):
+def setUp(self):
+iotests.qemu_img('create', '-f', iotests.imgfmt, img, '10M')
+self.vm = iotests.VM()
+self.vm.launch()
+self.vm.qmp('migrate-set-capabilities', capabilities=[
+{
+'capability': 'events',
+'state': True
+}
+])
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(img)
+
+def check_ram_only(self, output):
+str_json = output.decode()
+json_obj = json.loads(str_json)
+
+success = False
+for key in json_obj:
+self.assertTrue("ram" in key)
+success = True
+self.assertTrue(success)
+
+def run_migration(self, no_ram, tmp_stream):
+if no_ram:
+output = self.vm.qmp('migrate-set-parameters',
+stream_content_list = ['vmstate'])
+else:
+self.vm.qmp('migrate-set-parameters',
+stream_content_list = ['ram'])
+
+self.vm.qmp('migrate', uri='exec:cat>' + tmp_stream)
+
+while True:
+event = self.vm.event_wait('MIGRATION')
+
+if event['data']['status'] == 'completed':
+break
+
+
+def test_no_ram(self):
+with iotests.FilePath('tmp_stream') as tmp_stream:
+self.run_migration(True, tmp_stream)
+output = subprocess.run(
+['../../../scripts/analyze-migration.py', '-f', tmp_stream],
+stdout=subprocess.PIPE,
+stderr=subprocess.STDOUT,
+check=False).stdout
+
+self.assertFalse('ram' in output.decode())
+
+def test_ram_only(self):
+with iotests.FilePath('tmp_stream') as tmp_stream:
+self.run_migration(False, tmp_stream)
+output = subprocess.run(
+['../../../scripts/analyze-migration.py', '-f', tmp_stream,
+'--ram-only'],
+stdout=subprocess.PIPE,
+stderr=subprocess.STDOUT,
+check=False).stdout
+
+self.check_ram_only(output)
+
+if __name__ == '__main__':
+iotests.main(supported_protocols=['file'])
diff --git a/tests/qemu-iotests/tests/migrate-ram-stream-content-test.out 
b/tests/qemu-iotests/tests/migrate-ram-stream-content-test.out
new file mode 100644
index 00..fbc63e62f8
--- /dev/null
+++ b/tests/qemu-iotests/tests/migrate-ram-stream-content-test.out
@@ -0,0 +1,5 @@
+..
+--
+Ran 2 tests
+
+OK
-- 
2.31.1

[PATCH v3 02/17] migration: should_skip() implemented

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

For next changes it is convenient to make all decisions about
sections skipping in one function.

Signed-off-by: Nikita Lapshin 
---
 migration/savevm.c | 54 --
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 02ed94c180..c68f187ef7 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -943,6 +943,15 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se,
 return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
 }
 
+static bool should_skip(SaveStateEntry *se)
+{
+if (se->ops && se->ops->is_active && !se->ops->is_active(se->opaque)) {
+return true;
+}
+
+return false;
+}
+
 /*
  * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
  */
@@ -1207,10 +1216,8 @@ void qemu_savevm_state_setup(QEMUFile *f)
 if (!se->ops || !se->ops->save_setup) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 save_section_header(f, se, QEMU_VM_SECTION_START);
 
@@ -1238,10 +1245,8 @@ int qemu_savevm_state_resume_prepare(MigrationState *s)
 if (!se->ops || !se->ops->resume_prepare) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 ret = se->ops->resume_prepare(s, se->opaque);
 if (ret < 0) {
@@ -1268,8 +1273,7 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
 if (!se->ops || !se->ops->save_live_iterate) {
 continue;
 }
-if (se->ops->is_active &&
-!se->ops->is_active(se->opaque)) {
+if (should_skip(se)) {
 continue;
 }
 if (se->ops->is_active_iterate &&
@@ -1337,10 +1341,8 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
 if (!se->ops || !se->ops->save_live_complete_postcopy) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 trace_savevm_section_start(se->idstr, se->section_id);
 /* Section type */
@@ -1374,10 +1376,8 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile 
*f, bool in_postcopy)
 continue;
 }
 
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 trace_savevm_section_start(se->idstr, se->section_id);
 
@@ -1417,6 +1417,9 @@ int 
qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
 trace_savevm_section_skip(se->idstr, se->section_id);
 continue;
 }
+if (should_skip(se)) {
+continue;
+}
 
 trace_savevm_section_start(se->idstr, se->section_id);
 
@@ -1522,10 +1525,8 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t 
threshold_size,
 if (!se->ops || !se->ops->save_live_pending) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 se->ops->save_live_pending(f, se->opaque, threshold_size,
res_precopy_only, res_compatible,
@@ -1635,6 +1636,9 @@ int qemu_save_device_state(QEMUFile *f)
 if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
 continue;
 }
+if (should_skip(se)) {
+continue;
+}
 
 save_section_header(f, se, QEMU_VM_SECTION_FULL);
 
@@ -2542,10 +2546,8 @@ static int qemu_loadvm_state_setup(QEMUFile *f)
 if (!se->ops || !se->ops->load_setup) {
 continue;
 }
-if (se->ops->is_active) {
-if (!se->ops->is_active(se->opaque)) {
-continue;
-}
+if (should_skip(se)) {
+continue;
 }
 
 ret = se->ops->load_setup(f, se->opaque);
-- 
2.31.1

[PATCH v3 06/17] migration: Add RAM part of migration stream

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

'ram' parameter enable RAM sections in migration stream. If it
isn't specified it will be skipped.

Signed-off-by: Nikita Lapshin 
---
 migration/migration.c | 17 -
 migration/migration.h |  1 +
 migration/ram.c   |  6 ++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index d81f3c6891..6528b3ad41 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1339,7 +1339,8 @@ static bool check_stream_parts(strList *stream_list)
 for (; stream_list; stream_list = stream_list->next) {
 if (!strcmp(stream_list->value, "vmstate") ||
 !strcmp(stream_list->value, "dirty-bitmaps") ||
-!strcmp(stream_list->value, "block")) {
+!strcmp(stream_list->value, "block") ||
+!strcmp(stream_list->value, "ram")) {
 continue;
 }
 
@@ -2653,6 +2654,20 @@ bool migrate_background_snapshot(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
 }
 
+bool migrate_ram(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+/*
+ * By default RAM is enabled so if stream-content-list disabled
+ * RAM will be passed.
+ */
+return !s->parameters.has_stream_content_list ||
+   migrate_find_stream_content("ram");
+}
+
 /* Checks if stream-content parameter has section_name in list */
 bool migrate_find_stream_content(const char *section_name)
 {
diff --git a/migration/migration.h b/migration/migration.h
index 411c58e919..5c43788a2b 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -395,6 +395,7 @@ int migrate_decompress_threads(void);
 bool migrate_use_events(void);
 bool migrate_postcopy_blocktime(void);
 bool migrate_background_snapshot(void);
+bool migrate_ram(void);
 
 bool migrate_find_stream_content(const char *section_name);
 
diff --git a/migration/ram.c b/migration/ram.c
index 170e522a1f..ddc7abd08a 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -4263,6 +4263,11 @@ static int ram_resume_prepare(MigrationState *s, void 
*opaque)
 return 0;
 }
 
+static bool is_ram_active(void *opaque)
+{
+return migrate_ram();
+}
+
 static SaveVMHandlers savevm_ram_handlers = {
 .save_setup = ram_save_setup,
 .save_live_iterate = ram_save_iterate,
@@ -4275,6 +4280,7 @@ static SaveVMHandlers savevm_ram_handlers = {
 .load_setup = ram_load_setup,
 .load_cleanup = ram_load_cleanup,
 .resume_prepare = ram_resume_prepare,
+.is_active = is_ram_active,
 };
 
 static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
-- 
2.31.1

[PATCH v3 11/17] migration/qemu-file: Fix qemu_ftell() for non-writable file

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

qemu_ftell() will return wrong value for non-writable QEMUFile.
This happens due to call qemu_fflush() inside qemu_ftell(), this
function won't flush if file is readable.

Signed-off-by: Nikita Lapshin 
---
 migration/qemu-file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 1479cddad9..53ccef80ac 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -663,7 +663,8 @@ int64_t qemu_ftell_fast(QEMUFile *f)
 int64_t qemu_ftell(QEMUFile *f)
 {
 qemu_fflush(f);
-return f->pos;
+/* Consider that qemu_fflush() won't work if file is non-writable */
+return f->pos + f->buf_index;
 }
 
 int qemu_file_rate_limit(QEMUFile *f)
-- 
2.31.1

[PATCH v3 12/17] migration/snapshot: Move RAM_SAVE_FLAG_xxx defines to migration/ram.h

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

Move RAM_SAVE_FLAG_xxx defines from migration/ram.c to migration/ram.h

Signed-off-by: Nikita Lapshin 
---
 migration/ram.c | 16 
 migration/ram.h | 16 
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index ddc7abd08a..da7c7ec0e5 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -66,22 +66,6 @@
 /***/
 /* ram save/restore */
 
-/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
- * worked for pages that where filled with the same char.  We switched
- * it to only search for the zero value.  And to avoid confusion with
- * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
- */
-
-#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
-#define RAM_SAVE_FLAG_ZERO 0x02
-#define RAM_SAVE_FLAG_MEM_SIZE 0x04
-#define RAM_SAVE_FLAG_PAGE 0x08
-#define RAM_SAVE_FLAG_EOS  0x10
-#define RAM_SAVE_FLAG_CONTINUE 0x20
-#define RAM_SAVE_FLAG_XBZRLE   0x40
-/* 0x80 is reserved in migration.h start with 0x100 next */
-#define RAM_SAVE_FLAG_COMPRESS_PAGE0x100
-
 XBZRLECacheStats xbzrle_counters;
 
 /* struct contains XBZRLE cache and a static page
diff --git a/migration/ram.h b/migration/ram.h
index 2c6dc3675d..9dddfd381a 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -33,6 +33,22 @@
 #include "exec/cpu-common.h"
 #include "io/channel.h"
 
+/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
+ * worked for pages that where filled with the same char.  We switched
+ * it to only search for the zero value.  And to avoid confusion with
+ * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
+ */
+
+#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
+#define RAM_SAVE_FLAG_ZERO 0x02
+#define RAM_SAVE_FLAG_MEM_SIZE 0x04
+#define RAM_SAVE_FLAG_PAGE 0x08
+#define RAM_SAVE_FLAG_EOS  0x10
+#define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBZRLE   0x40
+/* 0x80 is reserved in migration.h start with 0x100 next */
+#define RAM_SAVE_FLAG_COMPRESS_PAGE0x100
+
 extern MigrationStats ram_counters;
 extern XBZRLECacheStats xbzrle_counters;
 extern CompressionStats compression_counters;
-- 
2.31.1

[PATCH v3 09/17] migration/snapshot: Introduce qemu-snapshot tool

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

Execution environment, command-line argument parsing, usage/version info etc.

Signed-off-by: Nikita Lapshin 
---
 include/qemu-snapshot.h   |  65 ++
 migration/qemu-snapshot.c |  57 +
 qemu-snapshot.c   | 433 ++
 3 files changed, 555 insertions(+)
 create mode 100644 include/qemu-snapshot.h
 create mode 100644 migration/qemu-snapshot.c
 create mode 100644 qemu-snapshot.c

diff --git a/include/qemu-snapshot.h b/include/qemu-snapshot.h
new file mode 100644
index 00..8e548e7630
--- /dev/null
+++ b/include/qemu-snapshot.h
@@ -0,0 +1,65 @@
+/*
+ * QEMU External Snapshot Utility
+ *
+ * Copyright Virtuozzo GmbH, 2021
+ *
+ * Authors:
+ *  Andrey Gruzdev   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_SNAPSHOT_H
+#define QEMU_SNAPSHOT_H
+
+/* Invalid offset */
+#define INVALID_OFFSET  -1
+/* Maximum byte count for qemu_get_buffer_in_place() */
+#define INPLACE_READ_MAX(32768 - 4096)
+
+/* Backing cluster size */
+#define BDRV_CLUSTER_SIZE   (1024 * 1024)
+
+/* Minimum supported target page size */
+#define PAGE_SIZE_MIN   4096
+/*
+ * Maximum supported target page size. The limit is caused by using
+ * QEMUFile and qemu_get_buffer_in_place() on migration channel.
+ * IO_BUF_SIZE is currently 32KB.
+ */
+#define PAGE_SIZE_MAX   16384
+/* RAM slice size for snapshot saving */
+#define SLICE_SIZE  PAGE_SIZE_MAX
+/* RAM slice size for snapshot revert */
+#define SLICE_SIZE_REVERT   (16 * PAGE_SIZE_MAX)
+
+typedef struct StateInfo {
+int64_t page_size;
+int64_t page_mask;
+int page_bits;
+int64_t slice_size;
+int64_t slice_mask;
+int slice_bits;
+} StateInfo;
+
+typedef struct StateSaveCtx {
+BlockBackend *blk;  /* Block backend */
+
+StateInfo state_parameters; /* Migration state info*/
+} StateSaveCtx;
+
+typedef struct StateLoadCtx {
+BlockBackend *blk;  /* Block backend */
+
+StateInfo state_parameters; /* Migration state info*/
+} StateLoadCtx;
+
+void ram_init_state(void);
+void ram_destroy_state(void);
+StateSaveCtx *get_save_context(void);
+StateLoadCtx *get_load_context(void);
+int coroutine_fn save_state_main(StateSaveCtx *s);
+int coroutine_fn load_state_main(StateLoadCtx *s);
+
+#endif /* QEMU_SNAPSHOT_H */
diff --git a/migration/qemu-snapshot.c b/migration/qemu-snapshot.c
new file mode 100644
index 00..f7695e75c7
--- /dev/null
+++ b/migration/qemu-snapshot.c
@@ -0,0 +1,57 @@
+/*
+ * QEMU External Snapshot Utility
+ *
+ * Copyright Virtuozzo GmbH, 2021
+ *
+ * Authors:
+ *  Andrey Gruzdev   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
+#include "qemu/coroutine.h"
+#include "qemu/cutils.h"
+#include "qemu/bitmap.h"
+#include "qemu/error-report.h"
+#include "io/channel-buffer.h"
+#include "migration/qemu-file-channel.h"
+#include "migration/qemu-file.h"
+#include "migration/savevm.h"
+#include "migration/ram.h"
+#include "qemu-snapshot.h"
+
+/* RAM transfer context */
+typedef struct RAMCtx {
+int64_t normal_pages;   /* Total number of normal pages */
+} RAMCtx;
+
+static RAMCtx ram_ctx;
+
+int coroutine_fn save_state_main(StateSaveCtx *s)
+{
+/* TODO: implement */
+return 0;
+}
+
+int coroutine_fn load_state_main(StateLoadCtx *s)
+{
+/* TODO: implement */
+return 0;
+}
+
+/* Initialize snapshot RAM state */
+void ram_init_state(void)
+{
+RAMCtx *ram = &ram_ctx;
+
+memset(ram, 0, sizeof(ram_ctx));
+}
+
+/* Destroy snapshot RAM state */
+void ram_destroy_state(void)
+{
+/* TODO: implement */
+}
diff --git a/qemu-snapshot.c b/qemu-snapshot.c
new file mode 100644
index 00..683f1b265a
--- /dev/null
+++ b/qemu-snapshot.c
@@ -0,0 +1,433 @@
+/*
+ * QEMU External Snapshot Utility
+ *
+ * Copyright Virtuozzo GmbH, 2021
+ *
+ * Authors:
+ *  Andrey Gruzdev   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include 
+
+#include "qemu/memalign.h"
+#include "qemu-common.h"
+#include "qemu-version.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
+#include "qemu/coroutine.h"
+#include "qemu/error-report.h"
+#include "qemu/config-file.h"
+#include "qemu/log.h"
+#include "qemu/option_int.h"
+#include "qemu/main-loop.h"
+#include "trace/control.h"
+#include "io/channel-util.h"
+#include "io/channel-buffer.h"
+#include "migration/qemu-file-channel.h"
+#include "migration/qemu-file.h"
+#include "migration/savevm.h"
+#include "migration/misc.h"
+#include "qemu-snapshot.h"
+
+int64_t p

Re: [PATCH] tests/vm: do not specify -bios option

2022-06-16 Thread Daniel P . Berrangé

On Thu, Jun 16, 2022 at 12:14:01PM +0200, Paolo Bonzini wrote:
> On 6/16/22 10:44, Daniel P. Berrangé wrote:
> > On Thu, Jun 16, 2022 at 10:30:25AM +0200, Paolo Bonzini wrote:
> > > When running from the build tree, the executable is able to find
> > > the BIOS on its own; when running from the source tree, a firmware
> > > blob should already be installed and there is no guarantee that
> > > the one in the source tree works with the QEMU that is being used for
> > > the installation.
> > I think there is interaction with
> > 
> >
> > --firmwarepath=/usr/share/qemu-firmware:/usr/share/ipxe/qemu:/usr/share/seavgabios:/usr/share/seabios:/usr/share/sgabios
> > 
> > as the firmware path listed there will be searched before the local
> > build tree.
> > 
> > I agree with your desire to remove the -bios option, but I'm not
> > sure it is entirely safe unless logic in qemu_find_file is fixed
> > to ignore the global search path when running from the build
> > tree.
> 
> Isn't this the same for any other invocation of QEMU, for example in qtest?

Yes, quite likely in fact, so a pre-existing widespread problem and
thus not a reason to avoid this proposed change.

> I admit I didn't think of the interaction with --firmwarepath, but "if it
> hurts, don't do it" might apply here.  That is, install compatible firmware
> to the path _before_ trying to use a QEMU that specifies that path.

I'm mostly thinking of downstream distro developers where they have an
installed QEMU but are working on a build from a different QEMU version.
We can address this separately though

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[PATCH v3 13/17] migration/snapshot: Block layer support in qemu-snapshot

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

This commit enables few functions to simplify block layer work
for qemu-snapshot tool.

Signed-off-by: Nikita Lapshin 
---
 include/qemu-snapshot.h  |   3 +
 migration/meson.build|   1 +
 migration/qemu-snapshot-io.c | 112 +++
 3 files changed, 116 insertions(+)
 create mode 100644 migration/qemu-snapshot-io.c

diff --git a/include/qemu-snapshot.h b/include/qemu-snapshot.h
index 8e548e7630..be2557f6a0 100644
--- a/include/qemu-snapshot.h
+++ b/include/qemu-snapshot.h
@@ -62,4 +62,7 @@ StateLoadCtx *get_load_context(void);
 int coroutine_fn save_state_main(StateSaveCtx *s);
 int coroutine_fn load_state_main(StateLoadCtx *s);
 
+QEMUFile *qemu_fopen_bdrv_vmstate(BlockDriverState *bs, int is_writable);
+void qemu_fsplice(QEMUFile *f_dst, QEMUFile *f_src, size_t size);
+size_t qemu_fsplice_tail(QEMUFile *f_dst, QEMUFile *f_src);
 #endif /* QEMU_SNAPSHOT_H */
diff --git a/migration/meson.build b/migration/meson.build
index 13498a6db3..3a04576c30 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -9,6 +9,7 @@ migration_files = files(
   'yank_functions.c',
   'migration.c',
   'qemu-snapshot.c',
+  'qemu-snapshot-io.c'
 )
 softmmu_ss.add(migration_files)
 
diff --git a/migration/qemu-snapshot-io.c b/migration/qemu-snapshot-io.c
new file mode 100644
index 00..904cb92c84
--- /dev/null
+++ b/migration/qemu-snapshot-io.c
@@ -0,0 +1,112 @@
+/*
+ * QEMU External Snapshot Utility
+ *
+ * Copyright Virtuozzo GmbH, 2021
+ *
+ * Authors:
+ *  Andrey Gruzdev   
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine.h"
+#include "sysemu/block-backend.h"
+#include "migration/qemu-file.h"
+#include "qemu-snapshot.h"
+
+static ssize_t bdrv_vmstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
+   size_t size, Error **errp)
+{
+return bdrv_load_vmstate((BlockDriverState *) opaque, buf, pos, size);
+}
+
+static ssize_t bdrv_vmstate_writev_buffer(void *opaque, struct iovec *iov,
+int iovcnt, int64_t pos, Error **errp)
+{
+QEMUIOVector qiov;
+int res;
+
+qemu_iovec_init_external(&qiov, iov, iovcnt);
+
+res = bdrv_writev_vmstate((BlockDriverState *) opaque, &qiov, pos);
+if (res < 0) {
+return res;
+}
+
+return qiov.size;
+}
+
+static int bdrv_vmstate_fclose(void *opaque, Error **errp)
+{
+return bdrv_flush((BlockDriverState *) opaque);
+}
+
+static const QEMUFileOps bdrv_vmstate_read_ops = {
+.get_buffer = bdrv_vmstate_get_buffer,
+.close  = bdrv_vmstate_fclose,
+};
+
+static const QEMUFileOps bdrv_vmstate_write_ops = {
+.writev_buffer  = bdrv_vmstate_writev_buffer,
+.close  = bdrv_vmstate_fclose,
+};
+
+/* Create QEMUFile to access vmstate stream on QCOW2 image */
+QEMUFile *qemu_fopen_bdrv_vmstate(BlockDriverState *bs, int is_writable)
+{
+if (is_writable) {
+return qemu_fopen_ops(bs, &bdrv_vmstate_write_ops, true);
+}
+
+return qemu_fopen_ops(bs, &bdrv_vmstate_read_ops, true);
+}
+
+/* Move number of bytes from the source QEMUFile to destination */
+void qemu_fsplice(QEMUFile *f_dst, QEMUFile *f_src, size_t size)
+{
+size_t rest = size;
+
+while (rest) {
+uint8_t *ptr = NULL;
+size_t req_size;
+size_t count;
+
+req_size = MIN(rest, INPLACE_READ_MAX);
+count = qemu_peek_buffer(f_src, &ptr, req_size, 0);
+qemu_file_skip(f_src, count);
+
+qemu_put_buffer(f_dst, ptr, count);
+rest -= count;
+}
+}
+
+/*
+ * Move data from source QEMUFile to destination
+ * until EOF is reached on source.
+ */
+size_t qemu_fsplice_tail(QEMUFile *f_dst, QEMUFile *f_src)
+{
+bool eof = false;
+size_t res = 0;
+
+while (!eof) {
+const size_t size = INPLACE_READ_MAX;
+uint8_t *buffer = NULL;
+size_t count;
+
+count = qemu_peek_buffer(f_src, &buffer, size, 0);
+qemu_file_skip(f_src, count);
+
+/* Reached EOF on source? */
+if (count != size) {
+eof = true;
+}
+
+qemu_put_buffer(f_dst, buffer, count);
+res += count;
+}
+
+return res;
+}
-- 
2.31.1

[PATCH v3 15/17] migration/snapshot: Save part implement

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

Snapshot save are done in two stages. First tool save vmstate part. It must
be guaranteed that tool will get vmstate part only. This is because tool
won't parse this stream and will just save it non-modified.

Second stage is ram sending. Also it must be guarantee that ram part only will
be passed. Ram will be saved in qcow2 file.

The goal was to avoid duplicate same part of already existed migration code
so in this patch savevm handlers were used. Tool replace existed ram handler
with its own and after that call existed functions. It is work correctly
because algorithm of tool saving is similar to loadvm algorithm. I think
this isn't obvious part so it should be described here.

Signed-off-by: Nikita Lapshin 
---
 include/qemu-snapshot.h   |  16 +-
 migration/qemu-snapshot.c | 323 +-
 migration/savevm.c|  22 ++-
 migration/savevm.h|   4 +
 qemu-snapshot.c   |  82 +-
 5 files changed, 435 insertions(+), 12 deletions(-)

diff --git a/include/qemu-snapshot.h b/include/qemu-snapshot.h
index be2557f6a0..a97abd9f25 100644
--- a/include/qemu-snapshot.h
+++ b/include/qemu-snapshot.h
@@ -21,6 +21,7 @@
 /* Backing cluster size */
 #define BDRV_CLUSTER_SIZE   (1024 * 1024)
 
+#define VMSTATE_SIZE(1024 * 1024)
 /* Minimum supported target page size */
 #define PAGE_SIZE_MIN   4096
 /*
@@ -34,6 +35,8 @@
 /* RAM slice size for snapshot revert */
 #define SLICE_SIZE_REVERT   (16 * PAGE_SIZE_MAX)
 
+typedef struct QIOChannelBuffer QIOChannelBuffer;
+
 typedef struct StateInfo {
 int64_t page_size;
 int64_t page_mask;
@@ -44,9 +47,17 @@ typedef struct StateInfo {
 } StateInfo;
 
 typedef struct StateSaveCtx {
-BlockBackend *blk;  /* Block backend */
+BlockBackend *blk;  /* Block backend */
 
-StateInfo state_parameters; /* Migration state info*/
+QEMUFile *f_fd; /* QEMUFile for incoming stream */
+QEMUFile *f_vmstate;/* QEMUFile for vmstate backing */
+
+QIOChannelBuffer *ioc_vmstate;  /* Buffer for vmstate */
+QIOChannelBuffer *ioc_pages;/* Page coalescing buffer */
+
+StateInfo state_parameters; /* Migration state info*/
+
+size_t vmstate_len; /* vmstate len */
 } StateSaveCtx;
 
 typedef struct StateLoadCtx {
@@ -60,6 +71,7 @@ void ram_destroy_state(void);
 StateSaveCtx *get_save_context(void);
 StateLoadCtx *get_load_context(void);
 int coroutine_fn save_state_main(StateSaveCtx *s);
+void save_vmstate(StateSaveCtx *s);
 int coroutine_fn load_state_main(StateLoadCtx *s);
 
 QEMUFile *qemu_fopen_bdrv_vmstate(BlockDriverState *bs, int is_writable);
diff --git a/migration/qemu-snapshot.c b/migration/qemu-snapshot.c
index 394c6acb77..2c9909fc8e 100644
--- a/migration/qemu-snapshot.c
+++ b/migration/qemu-snapshot.c
@@ -22,6 +22,8 @@
 #include "migration/savevm.h"
 #include "migration/ram.h"
 #include "qemu-snapshot.h"
+#include "migration/savevm.h"
+#include "migration/register.h"
 
 /* RAM block */
 /* TODO RAMBlock should be replace with existing struct RAMBlock in ram.c */
@@ -40,6 +42,11 @@ typedef struct RAMBlock {
 char idstr[256];/* RAM block id string */
 } RAMBlock;
 
+typedef struct RAMPage {
+RAMBlock *block;/* RAM block containing the page */
+int64_t offset; /* Page offset in RAM block */
+} RAMPage;
+
 /* RAM transfer context */
 typedef struct RAMCtx {
 int64_t normal_pages;   /* Total number of normal pages */
@@ -51,6 +58,26 @@ typedef struct RAMCtx {
 
 static RAMCtx ram_ctx;
 
+static int64_t page_size;
+static int page_bits;
+static int64_t page_mask;
+static int64_t slice_size;
+static int slice_bits;
+static int64_t slice_mask;
+/*
+ * Init sufficient global variables
+ * TODO: These variables should be removed or add to existing global structures
+ */
+static void init_global_var(StateInfo *si)
+{
+page_size = si->page_size;
+page_bits = si->page_bits;
+page_mask = si->page_mask;
+slice_size = si->slice_size;
+slice_bits = si->slice_bits;
+slice_mask = si->slice_mask;
+}
+
 static inline
 bool ram_offset_in_block(RAMBlock *block, int64_t offset)
 {
@@ -158,6 +185,19 @@ static void ram_block_add(const char *idstr, int64_t size)
 QSIMPLEQ_INSERT_TAIL(&ram_ctx.ram_block_list, block, next);
 }
 
+static void ram_block_list_init_bitmaps(void)
+{
+RAMBlock *block;
+
+QSIMPLEQ_FOREACH(block, &ram_ctx.ram_block_list, next) {
+block->nr_pages = block->length >> page_bits;
+block->nr_slices = ROUND_UP(block->length, slice_size) >> slice_bits;
+
+block->bitmap = bitmap_new(block->nr_slices);
+bitmap_set(block->bitmap, 0, block->nr_slices);
+}
+}
+
 /*
  * Assume that QEMUFile is migration stream and try to get
  * from f_src ram blocks list. mem_size is a total amount of bytes of whole
@@ -188,6 +228,9 @@ static int ram_block_list_from_stream(

Re: [PATCH v2 1/2] hw/nvme: Implement shadow doorbell buffer support

2022-06-16 Thread Klaus Jensen

On Jun 15 22:49, Jinhao Fan wrote:
> Implement Doorbel Buffer Config command (Section 5.7 in NVMe Spec 1.3)
> and Shadow Doorbel buffer & EventIdx buffer handling logic (Section 7.13
> in NVMe Spec 1.3). For queues created before the Doorbell Buffer Config
> command, the nvme_dbbuf_config function tries to associate each existing
> SQ and CQ with its Shadow Doorbel buffer and EventIdx buffer address.
> Queues created after the Doorbell Buffer Config command will have the
> doorbell buffers associated with them when they are initialized.
> 
> In nvme_process_sq and nvme_post_cqe, proactively check for Shadow
> Doorbell buffer changes instead of wait for doorbell register changes.
> This reduces the number of MMIOs.
> 
> In nvme_process_db(), update the shadow doorbell buffer value with
> the doorbell register value if it is the admin queue. This is a hack
> since hosts like Linux NVMe driver and SPDK do not use shadow
> doorbell buffer for the admin queue. Copying the doorbell register
> value to the shadow doorbell buffer allows us to support these hosts
> as well as spec-compliant hosts that use shadow doorbell buffer for
> the admin queue.
> 
> Signed-off-by: Jinhao Fan 
> ---
>  hw/nvme/ctrl.c   | 112 ++-
>  hw/nvme/nvme.h   |   8 
>  include/block/nvme.h |   2 +
>  3 files changed, 121 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 03760ddeae..7be2e43f52 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -223,6 +223,7 @@ static const uint32_t nvme_cse_acs[256] = {
>  [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
>  [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
>  [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
> +[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP,
>  [NVME_ADM_CMD_FORMAT_NVM]   = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
>  };
>  
> @@ -1304,6 +1305,12 @@ static inline void nvme_blk_write(BlockBackend *blk, 
> int64_t offset,
>  }
>  }
>  
> +static void nvme_update_cq_head(NvmeCQueue *cq)
> +{
> +pci_dma_read(&cq->ctrl->parent_obj, cq->db_addr, &cq->head,
> +sizeof(cq->head));
> +}
> +
>  static void nvme_post_cqes(void *opaque)
>  {
>  NvmeCQueue *cq = opaque;
> @@ -1316,6 +1323,10 @@ static void nvme_post_cqes(void *opaque)
>  NvmeSQueue *sq;
>  hwaddr addr;
>  
> +if (cq->cqid && n->dbbuf_enabled) {
> +nvme_update_cq_head(cq);
> +}
> +

This wont work for drivers that *do* rely on updating the buffer for
admin queues, so we should read it regardless of the value of the queue
id (since we are now updating it through the "Keith Hack^TM").

>  if (nvme_cq_full(cq)) {
>  break;
>  }
> @@ -4237,6 +4248,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest 
> *req)
>  static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
>   uint16_t sqid, uint16_t cqid, uint16_t size)
>  {
> +uint32_t stride = 4 << NVME_CAP_DSTRD(n->bar.cap);

You need to load the little endian value with ldq_le_p(&n->bar.cap).
Sorry, didn't catch this in v1.

>  int i;
>  NvmeCQueue *cq;
>  
> @@ -4256,6 +4268,11 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, 
> uint64_t dma_addr,
>  }
>  sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
>  
> +if (n->dbbuf_enabled) {
> +sq->db_addr = n->dbbuf_dbs + 2 * sqid * stride;
> +sq->ei_addr = n->dbbuf_eis + 2 * sqid * stride;
> +}
> +
>  assert(n->cq[cqid]);
>  cq = n->cq[cqid];
>  QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
> @@ -4599,6 +4616,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, 
> uint64_t dma_addr,
>   uint16_t cqid, uint16_t vector, uint16_t size,
>   uint16_t irq_enabled)
>  {
> +uint32_t stride = 4 << NVME_CAP_DSTRD(n->bar.cap);

Same as above.

>  int ret;
>  
>  if (msix_enabled(&n->parent_obj)) {
> @@ -4615,6 +4633,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, 
> uint64_t dma_addr,
>  cq->head = cq->tail = 0;
>  QTAILQ_INIT(&cq->req_list);
>  QTAILQ_INIT(&cq->sq_list);
> +if (n->dbbuf_enabled) {
> +cq->db_addr = n->dbbuf_dbs + (2 * cqid + 1) * stride;
> +cq->ei_addr = n->dbbuf_eis + (2 * cqid + 1) * stride;
> +}
>  n->cq[cqid] = cq;
>  cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
>  }
> @@ -5767,6 +5789,47 @@ out:
>  return status;
>  }
>  
> +static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
> +{
> +uint32_t stride = 4 << NVME_CAP_DSTRD(n->bar.cap);
> +uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
> +uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
> +int i;
> +
> +/* Address should be page aligned */
> +if (dbs_addr & (n->page_size - 1) || eis_addr & (n->page_size

Re: [PATCH] tests/vm: do not specify -bios option

2022-06-16 Thread Daniel P . Berrangé

On Thu, Jun 16, 2022 at 10:30:25AM +0200, Paolo Bonzini wrote:
> When running from the build tree, the executable is able to find
> the BIOS on its own; when running from the source tree, a firmware
> blob should already be installed and there is no guarantee that
> the one in the source tree works with the QEMU that is being used for
> the installation.
> 
> Just remove the -bios option, since it is unnecessary and in fact
> there are other x86 VM tests that do not bother specifying it.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  tests/vm/fedora  | 1 -
>  tests/vm/freebsd | 1 -
>  tests/vm/netbsd  | 1 -
>  tests/vm/openbsd | 1 -
>  4 files changed, 4 deletions(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[PULL 5/7] 9pfs: fix 'Twalk' to only send error if no component walked

2022-06-16 Thread Christian Schoenebeck

Current implementation of 'Twalk' request handling always sends an 'Rerror'
response if any error occured. The 9p2000 protocol spec says though:

  "
  If the first element cannot be walked for any reason, Rerror is returned.
  Otherwise, the walk will return an Rwalk message containing nwqid qids
  corresponding, in order, to the files that are visited by the nwqid
  successful elementwise walks; nwqid is therefore either nwname or the index
  of the first elementwise walk that failed.
  "

  http://ericvh.github.io/9p-rfc/rfc9p2000.html#anchor33

For that reason we are no longer leaving from an error path in function
v9fs_walk(), unless really no path component could be walked successfully or
if the request has been interrupted.

Local variable 'nwalked' counts and reflects the number of path components
successfully processed by background I/O thread, whereas local variable
'name_idx' subsequently counts and reflects the number of path components
eventually accepted successfully by 9p server controller portion.

New local variable 'any_err' is an aggregate variable reflecting whether any
error occurred at all, while already existing variable 'err' only reflects
the last error.

Despite QIDs being delivered to client in a more relaxed way now, it is
important to note though that fid still must remain unaffected if any error
occurred.

Signed-off-by: Christian Schoenebeck 
Reviewed-by: Greg Kurz 
Message-Id: 

---
 hw/9pfs/9p.c | 49 +
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index f29611e9ed..aebadeaa03 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -1768,7 +1768,7 @@ static void coroutine_fn v9fs_walk(void *opaque)
 {
 int name_idx, nwalked;
 g_autofree V9fsQID *qids = NULL;
-int i, err = 0;
+int i, err = 0, any_err = 0;
 V9fsPath dpath, path;
 P9ARRAY_REF(V9fsPath) pathes = NULL;
 uint16_t nwnames;
@@ -1834,19 +1834,20 @@ static void coroutine_fn v9fs_walk(void *opaque)
  * driver code altogether inside the following block.
  */
 v9fs_co_run_in_worker({
+nwalked = 0;
 if (v9fs_request_cancelled(pdu)) {
-err = -EINTR;
+any_err |= err = -EINTR;
 break;
 }
 err = s->ops->lstat(&s->ctx, &dpath, &fidst);
 if (err < 0) {
-err = -errno;
+any_err |= err = -errno;
 break;
 }
 stbuf = fidst;
-for (nwalked = 0; nwalked < nwnames; nwalked++) {
+for (; nwalked < nwnames; nwalked++) {
 if (v9fs_request_cancelled(pdu)) {
-err = -EINTR;
+any_err |= err = -EINTR;
 break;
 }
 if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
@@ -1856,16 +1857,16 @@ static void coroutine_fn v9fs_walk(void *opaque)
wnames[nwalked].data,
&pathes[nwalked]);
 if (err < 0) {
-err = -errno;
+any_err |= err = -errno;
 break;
 }
 if (v9fs_request_cancelled(pdu)) {
-err = -EINTR;
+any_err |= err = -EINTR;
 break;
 }
 err = s->ops->lstat(&s->ctx, &pathes[nwalked], &stbuf);
 if (err < 0) {
-err = -errno;
+any_err |= err = -errno;
 break;
 }
 stbufs[nwalked] = stbuf;
@@ -1875,13 +1876,19 @@ static void coroutine_fn v9fs_walk(void *opaque)
 });
 /*
  * Handle all the rest of this Twalk request on main thread ...
+ *
+ * NOTE: -EINTR is an exception where we deviate from the protocol spec
+ * and simply send a (R)Lerror response instead of bothering to assemble
+ * a (deducted) Rwalk response; because -EINTR is always the result of a
+ * Tflush request, so client would no longer wait for a response in this
+ * case anyway.
  */
-if (err < 0) {
+if ((err < 0 && !nwalked) || err == -EINTR) {
 goto out;
 }
 
-err = stat_to_qid(pdu, &fidst, &qid);
-if (err < 0) {
+any_err |= err = stat_to_qid(pdu, &fidst, &qid);
+if (err < 0 && !nwalked) {
 goto out;
 }
 stbuf = fidst;
@@ -1890,20 +1897,29 @@ static void coroutine_fn v9fs_walk(void *opaque)
 v9fs_path_copy(&dpath, &fidp->path);
 v9fs_path_copy(&path, &fidp->path);
 
-for (name_idx = 0; name_idx < nwnames; name_idx++) {
+for (name_idx = 0; name_idx < nwalked; name_idx++) {
 if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
 strcmp("..", wnames[name_idx].data))
 {
 stbuf = stbufs[name_idx];
-err = stat_to_qid(pdu, &stbuf, &qid);
+any_err |= err = stat_to_qid(pdu, &stbuf, &qid);
 if (e

[PATCH v3 16/17] migration/snapshot: Precopy load implemented

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

Load snapshot from qcow2 file. This part also work only with ram part
and send vmstate part as it was saved previously without parsing.

Here also migration code was reused but now it is savevm part. Tool replace
ram handlers as it did before in tool save part but now it is also needed to
avoid using another handlers. That is where stream-content-list is used
it helps to "disable" all handlers except ram.

Also here slices were used to increase efficiency of reading from disk.
So when tool will need to read a page from block it will read a number
of pages and send them all.

Signed-off-by: Nikita Lapshin 
---
 include/qemu-snapshot.h   |   4 +-
 migration/qemu-snapshot.c | 333 +-
 qemu-snapshot.c   |  25 ++-
 3 files changed, 356 insertions(+), 6 deletions(-)

diff --git a/include/qemu-snapshot.h b/include/qemu-snapshot.h
index a97abd9f25..74885c03bb 100644
--- a/include/qemu-snapshot.h
+++ b/include/qemu-snapshot.h
@@ -62,8 +62,10 @@ typedef struct StateSaveCtx {
 
 typedef struct StateLoadCtx {
 BlockBackend *blk;  /* Block backend */
+QEMUFile *f_fd;
+QEMUFile *f_vmstate;
 
-StateInfo state_parameters; /* Migration state info*/
+StateInfo state_parameters;
 } StateLoadCtx;
 
 void ram_init_state(void);
diff --git a/migration/qemu-snapshot.c b/migration/qemu-snapshot.c
index 2c9909fc8e..280f5be25c 100644
--- a/migration/qemu-snapshot.c
+++ b/migration/qemu-snapshot.c
@@ -21,9 +21,11 @@
 #include "migration/qemu-file.h"
 #include "migration/savevm.h"
 #include "migration/ram.h"
+#include "migration/migration.h"
 #include "qemu-snapshot.h"
 #include "migration/savevm.h"
 #include "migration/register.h"
+#include "qapi/qapi-types-migration.h"
 
 /* RAM block */
 /* TODO RAMBlock should be replace with existing struct RAMBlock in ram.c */
@@ -53,7 +55,8 @@ typedef struct RAMCtx {
 
 /* RAM block list head */
 QSIMPLEQ_HEAD(, RAMBlock) ram_block_list;
-
+RAMPage last_page;
+RAMBlock *last_sent_block;
 } RAMCtx;
 
 static RAMCtx ram_ctx;
@@ -523,12 +526,336 @@ void save_vmstate(StateSaveCtx *s)
 qemu_put_be64(s->f_vmstate, res);
 }
 
-int coroutine_fn load_state_main(StateLoadCtx *s)
+static void load_state_check_errors(StateLoadCtx *s, int *res)
+{
+/*
+ * Check for file errors on success. Replace generic -EINVAL
+ * retcode with file error if possible.
+ */
+if (*res >= 0 || *res == -EINVAL) {
+int f_res = qemu_file_get_error(s->f_fd);
+
+if (!f_res) {
+f_res = qemu_file_get_error(s->f_vmstate);
+}
+if (f_res) {
+*res = f_res;
+}
+}
+}
+
+static int send_conf(StateLoadCtx *s)
+{
+QEMUFile *f = s->f_vmstate;
+uint32_t id_len;
+uint8_t buf[256];
+
+qemu_put_byte(s->f_fd, QEMU_VM_CONFIGURATION);
+id_len = qemu_get_be32(f);
+qemu_put_be32(s->f_fd, id_len);
+
+if (id_len > 255) {
+error_report("Corrupted configuration section");
+return -EINVAL;
+}
+
+qemu_get_buffer(f, buf, id_len);
+qemu_put_buffer(s->f_fd, buf, id_len);
+buf[id_len] = '\0';
+
+return 0;
+}
+
+static int send_header(StateLoadCtx *s)
+{
+QEMUFile *f = s->f_vmstate;
+uint32_t v;
+
+/* Validate magic */
+v = qemu_get_be32(f);
+if (v != QEMU_VM_FILE_MAGIC) {
+error_report("Not a valid snapshot");
+ return -EINVAL;
+}
+qemu_put_be32(s->f_fd, v);
+
+v = qemu_get_be32(f);
+if (v == QEMU_VM_FILE_VERSION_COMPAT) {
+error_report("SaveVM v2 format is obsolete");
+return -EINVAL;
+}
+qemu_put_be32(s->f_fd, v);
+
+return 0;
+}
+
+static int load_state_ramlist(StateLoadCtx *s)
 {
-/* TODO: implement */
+uint64_t size = qemu_get_be64(s->f_vmstate);
+size = size & (~RAM_SAVE_FLAG_MEM_SIZE);
+return ram_block_list_from_stream(s->f_vmstate, size);
+}
+
+static int send_setup(StateLoadCtx *s)
+{
+/* We need to enable only ram using parameter stream-content-list */
+qemu_savevm_state_setup(s->f_fd);
+
+ram_block_list_init_bitmaps();
+
 return 0;
 }
 
+/* No need to parse vmstate part, we just send it using known offset */
+static int send_vmstate(QEMUFile *f_vmstate, QEMUFile *f_dest)
+{
+/* Send vmstate without last byte because it is QEMU_VM_EOF */
+size_t len;
+len = qemu_get_be64(f_vmstate);
+qemu_fsplice(f_dest, f_vmstate, len);
+
+return 0;
+}
+
+static int ram_send_setup(QEMUFile *f, void *opaque)
+{
+int res = ram_block_list_to_stream(f);
+
+qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+qemu_fflush(f);
+
+return res;
+}
+
+static bool find_next_page(RAMPage *page)
+{
+RAMCtx *ram = &ram_ctx;
+RAMBlock *block = ram->last_page.block;
+int64_t slice = ram->last_page.offset >> slice_bits;
+bool full_round = false;
+bool found = false;
+
+if (!block) {
+restart:
+block = QSIMPLEQ_FIRST(&ram->ram_block_list);
+slice = 0;
+

Re: [PATCH 1/8] migration: Implemented new parameter stream_content

2022-06-16 Thread Daniel P . Berrangé

On Thu, Jun 16, 2022 at 01:19:57PM +0300, nikita.laps...@openvz.org wrote:
> From: Nikita Lapshin 
> 
> This new optional parameter contains inormation about migration
> stream parts to be sent (such as RAM, block, bitmap). This looks
> better than using capabilities to solve problem of dividing
> migration stream.
> 
> Signed-off-by: Nikita Lapshin 
> ---
>  migration/migration.c | 47 ++-
>  migration/migration.h |  2 ++
>  qapi/migration.json   | 21 ---
>  3 files changed, 66 insertions(+), 4 deletions(-)
> 

> diff --git a/qapi/migration.json b/qapi/migration.json
> index 18e2610e88..80acf6dbc3 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -760,6 +760,12 @@
>  #block device name if there is one, and to their 
> node name
>  #otherwise. (Since 5.2)
>  #
> +# @stream-content-list: Parameter control content of migration stream such 
> as RAM,
> +#   vmstate, block and dirty-bitmaps. This is optional 
> parameter
> +#   so migration will work correctly without it.
> +#   This parameter takes string list as description of 
> content
> +#   and include that part of migration stream. (Since 
> 7.0)
> +#
>  # Features:
>  # @unstable: Member @x-checkpoint-delay is experimental.
>  #
> @@ -780,7 +786,8 @@
> 'xbzrle-cache-size', 'max-postcopy-bandwidth',
> 'max-cpu-throttle', 'multifd-compression',
> 'multifd-zlib-level' ,'multifd-zstd-level',
> -   'block-bitmap-mapping' ] }
> +   'block-bitmap-mapping',
> +   'stream-content-list' ] }
>  
>  ##
>  # @MigrateSetParameters:
> @@ -925,6 +932,12 @@
>  #block device name if there is one, and to their 
> node name
>  #otherwise. (Since 5.2)
>  #
> +# @stream-content-list: Parameter control content of migration stream such 
> as RAM,
> +#   vmstate, block and dirty-bitmaps. This is optional 
> parameter
> +#   so migration will work correctly without it.
> +#   This parameter takes string list as description of 
> content
> +#   and include that part of migration stream. (Since 
> 7.0)
> +#
>  # Features:
>  # @unstable: Member @x-checkpoint-delay is experimental.
>  #
> @@ -960,7 +973,8 @@
>  '*multifd-compression': 'MultiFDCompression',
>  '*multifd-zlib-level': 'uint8',
>  '*multifd-zstd-level': 'uint8',
> -'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
> +'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
> +'*stream-content-list': [ 'str' ] } }
>  
>  ##
>  # @migrate-set-parameters:
> @@ -1158,7 +1172,8 @@
>  '*multifd-compression': 'MultiFDCompression',
>  '*multifd-zlib-level': 'uint8',
>  '*multifd-zstd-level': 'uint8',
> -'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
> +'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
> +'*stream-content-list': [ 'str' ] } }

These will need to be represented using an enum type rather than
a string, since this value accepts a fixed pre-determined list of
strings.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[PATCH v3 17/17] migration/snapshot: Postcopy load implemented

2022-06-16 Thread nikita . lapshin

From: Nikita Lapshin 

It is a modified load part from previous patch.

Implemented new rp listen thread for snapshot-tool. Also implemented
functions for starting postcopy.

This mode can be turned on by specifying --postcopy flag.

Signed-off-by: Nikita Lapshin 
---
 include/qemu-snapshot.h   |  12 ++
 migration/migration.c | 123 +++
 migration/migration.h |   1 +
 migration/qemu-snapshot.c | 249 +-
 migration/savevm.c|  25 
 migration/savevm.h|   4 +
 qemu-snapshot.c   |  10 ++
 7 files changed, 422 insertions(+), 2 deletions(-)

diff --git a/include/qemu-snapshot.h b/include/qemu-snapshot.h
index 74885c03bb..b0f235747f 100644
--- a/include/qemu-snapshot.h
+++ b/include/qemu-snapshot.h
@@ -65,6 +65,15 @@ typedef struct StateLoadCtx {
 QEMUFile *f_fd;
 QEMUFile *f_vmstate;
 
+/* Postcopy part */
+bool postcopy;
+bool in_postcopy;
+
+/* Return path part */
+QemuThread rp_listen_thread;
+QEMUFile *f_rp_fd;
+bool has_rp_listen_thread;
+
 StateInfo state_parameters;
 } StateLoadCtx;
 
@@ -76,6 +85,9 @@ int coroutine_fn save_state_main(StateSaveCtx *s);
 void save_vmstate(StateSaveCtx *s);
 int coroutine_fn load_state_main(StateLoadCtx *s);
 
+int queue_page_request(const char *idstr, uint64_t offset,
+   uint32_t size);
+
 QEMUFile *qemu_fopen_bdrv_vmstate(BlockDriverState *bs, int is_writable);
 void qemu_fsplice(QEMUFile *f_dst, QEMUFile *f_src, size_t size);
 size_t qemu_fsplice_tail(QEMUFile *f_dst, QEMUFile *f_src);
diff --git a/migration/migration.c b/migration/migration.c
index 6528b3ad41..6f82e8ea48 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -61,6 +61,7 @@
 #include "sysemu/cpus.h"
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
+#include "qemu-snapshot.h"
 
 #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed throttling 
*/
 
@@ -3517,6 +3518,128 @@ static MigThrError postcopy_pause(MigrationState *s)
 }
 }
 
+/*
+ * Return-path message processing thread for qemu-snapshot tool
+ */
+void *qemu_snapshot_rp_listen_thread(void *opaque)
+{
+QEMUFile *f = (QEMUFile *) opaque;
+int res = 0;
+uint64_t pages = 0;
+
+while (!res) {
+uint8_t h_buf[512];
+const int h_max_len = sizeof(h_buf);
+int h_type;
+int h_len;
+size_t count;
+
+h_type = qemu_get_be16(f);
+h_len = qemu_get_be16(f);
+
+/* Make early check for input errors */
+res = qemu_file_get_error(f);
+if (res) {
+break;
+}
+
+/* Check message type */
+if (h_type >= MIG_RP_MSG_MAX || h_type == MIG_RP_MSG_INVALID) {
+error_report("RP: received invalid message type %d length %d",
+ h_type, h_len);
+res = -EINVAL;
+break;
+}
+
+/* Check message length */
+if (rp_cmd_args[h_type].len != -1 && h_len != rp_cmd_args[h_type].len) 
{
+error_report("RP: received %s message len %d expected %ld",
+ rp_cmd_args[h_type].name,
+ h_len, rp_cmd_args[h_type].len);
+res = -EINVAL;
+break;
+} else if (h_len > h_max_len) {
+error_report("RP: received %s message len %d max_len %d",
+ rp_cmd_args[h_type].name, h_len, h_max_len);
+res = -EINVAL;
+break;
+}
+
+count = qemu_get_buffer(f, h_buf, h_len);
+if (count != h_len) {
+break;
+}
+
+switch (h_type) {
+case MIG_RP_MSG_SHUT:
+{
+int shut_error;
+
+shut_error = be32_to_cpu(*(uint32_t *) h_buf);
+if (shut_error) {
+error_report("RP: sibling shutdown, error %d", shut_error);
+}
+
+/* Exit processing loop */
+res = 1;
+break;
+}
+
+case MIG_RP_MSG_REQ_PAGES:
+case MIG_RP_MSG_REQ_PAGES_ID:
+{
+pages++;
+uint64_t offset;
+uint32_t size;
+char *id_str = NULL;
+
+offset = be64_to_cpu(*(uint64_t *) (h_buf + 0));
+size = be32_to_cpu(*(uint32_t *) (h_buf + 8));
+
+if (h_type == MIG_RP_MSG_REQ_PAGES_ID) {
+int h_parsed_len = rp_cmd_args[MIG_RP_MSG_REQ_PAGES].len;
+
+if (h_len > h_parsed_len) {
+int id_len;
+
+/* RAM block id string */
+id_len = h_buf[h_parsed_len];
+id_str = (char *) &h_buf[h_parsed_len + 1];
+id_str[id_len] = 0;
+
+h_parsed_len += id_len + 1;
+}
+
+if (h_parsed_len != h_len) {
+error_report("RP: received %s message len %d expected %d",
+ rp_cmd_args[MIG

Re: [PATCH v4 0/7] 9pfs: fix 'Twalk' protocol violation

2022-06-16 Thread Christian Schoenebeck

On Dienstag, 15. März 2022 11:10:25 CEST Christian Schoenebeck wrote:
> Currently the implementation of 'Twalk' does not behave exactly as specified
> by the 9p2000 protocol specification. Actual fix is patch 5; see the
> description of that patch for details of what this overall fix and series
> is about.
> 
> PREREQUISITES
> =
> 
> This series requires the following additional patch to work correctly:
> https://lore.kernel.org/qemu-devel/e1ntpyu-yr...@lizzy.crudebyte.com/
> 
> OVERVIEW OF PATCHES
> ===
> 
> Patch 4 is a preparatory (pure) refactoring change to make actual 'Twalk'
> fix patch 5 better readable.
> 
> All the other patches are just additional test cases for guarding 'Twalk'
> behaviour.
> 
> v3 -> v4:
> 
>   * QID returned by Twalk request in fs_walk_2nd_nonexistent() test should
> NOT be identical to root node's QID. [patch 7]
> 
>   * Fix actual 'fid unaffected' check in fs_walk_2nd_nonexistent() test by
> sending a subsequent 'Tgetattr' request. [patch 7]
> 
> Christian Schoenebeck (7):
>   tests/9pfs: walk to non-existent dir
>   tests/9pfs: Twalk with nwname=0
>   tests/9pfs: compare QIDs in fs_walk_none() test
>   9pfs: refactor 'name_idx' -> 'nwalked' in v9fs_walk()
>   9pfs: fix 'Twalk' to only send error if no component walked
>   tests/9pfs: guard recent 'Twalk' behaviour fix
>   tests/9pfs: check fid being unaffected in fs_walk_2nd_nonexistent
> 
>  hw/9pfs/9p.c |  57 ++
>  tests/qtest/virtio-9p-test.c | 201 ++-
>  2 files changed, 231 insertions(+), 27 deletions(-)

Queued on 9p.next:
https://github.com/cschoenebeck/qemu/commits/9p.next

Good time to send a PR for this.

Thanks!

Best regards,
Christian Schoenebeck

[PULL 7/7] tests/9pfs: check fid being unaffected in fs_walk_2nd_nonexistent

2022-06-16 Thread Christian Schoenebeck

Extend previously added test case by checking that fid was unaffected
by 'Twalk' request (i.e. when 2nd path component of request being
invalid). Do that by subsequently sending a 'Tgetattr' request with
the fid previously used for 'Twalk'; that 'Tgetattr' request should
return an 'Rlerror' response by 9p server with error code ENOENT as
that fid is basically invalid.

And as we are at it, also check that the QID returned by 'Twalk' is
not identical to the root node's QID.

Signed-off-by: Christian Schoenebeck 
Reviewed-by: Greg Kurz 
Message-Id: 
<6f0813cafdbf683cdac8b1492dd4ef8699c5b1d9.1647339025.git.qemu_...@crudebyte.com>
---
 tests/qtest/virtio-9p-test.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index c787ded4d2..25305a4cf7 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -721,14 +721,19 @@ static void fs_version(void *obj, void *data, 
QGuestAllocator *t_alloc)
 do_version(obj);
 }
 
-static void do_attach(QVirtio9P *v9p)
+static void do_attach_rqid(QVirtio9P *v9p, v9fs_qid *qid)
 {
 P9Req *req;
 
 do_version(v9p);
 req = v9fs_tattach(v9p, 0, getuid(), 0);
 v9fs_req_wait_for_reply(req, NULL);
-v9fs_rattach(req, NULL);
+v9fs_rattach(req, qid);
+}
+
+static void do_attach(QVirtio9P *v9p)
+{
+do_attach_rqid(v9p, NULL);
 }
 
 static void fs_attach(void *obj, void *data, QGuestAllocator *t_alloc)
@@ -1101,19 +1106,32 @@ static void fs_walk_2nd_nonexistent(void *obj, void 
*data,
 {
 QVirtio9P *v9p = obj;
 alloc = t_alloc;
+v9fs_qid root_qid;
 uint16_t nwqid;
+uint32_t fid, err;
+P9Req *req;
 g_autofree v9fs_qid *wqid = NULL;
 g_autofree char *path = g_strdup_printf(
 QTEST_V9FS_SYNTH_WALK_FILE "/non-existent", 0
 );
 
-do_attach(v9p);
-do_walk_rqids(v9p, path, &nwqid, &wqid);
+do_attach_rqid(v9p, &root_qid);
+fid = do_walk_rqids(v9p, path, &nwqid, &wqid);
 /*
  * The 9p2000 protocol spec says: "nwqid is therefore either nwname or the
  * index of the first elementwise walk that failed."
  */
 assert(nwqid == 1);
+
+/* returned QID wqid[0] is file ID of 1st subdir */
+g_assert(wqid && wqid[0] && !is_same_qid(root_qid, wqid[0]));
+
+/* expect fid being unaffected by walk above */
+req = v9fs_tgetattr(v9p, fid, P9_GETATTR_BASIC, 0);
+v9fs_req_wait_for_reply(req, NULL);
+v9fs_rlerror(req, &err);
+
+g_assert_cmpint(err, ==, ENOENT);
 }
 
 static void fs_walk_none(void *obj, void *data, QGuestAllocator *t_alloc)
-- 
2.30.2

[PULL 0/7] 9p queue 2022-06-16

2022-06-16 Thread Christian Schoenebeck

The following changes since commit 9ac873a46963098441be920ef7a2eaf244a3352d:

  Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into 
staging (2022-06-15 09:47:24 -0700)

are available in the Git repository at:

  https://github.com/cschoenebeck/qemu.git tags/pull-9p-20220616

for you to fetch changes up to 0e43495d3b4a50fc5e22f7b71261fdd5b56fdfcb:

  tests/9pfs: check fid being unaffected in fs_walk_2nd_nonexistent (2022-06-16 
12:44:52 +0200)


9pfs: fix 'Twalk' protocol violation

Actual fix is patch 5, whereas patch 4 being preparatory, all other
patches are test cases to guard this Twalk issue.


Christian Schoenebeck (7):
  tests/9pfs: walk to non-existent dir
  tests/9pfs: Twalk with nwname=0
  tests/9pfs: compare QIDs in fs_walk_none() test
  9pfs: refactor 'name_idx' -> 'nwalked' in v9fs_walk()
  9pfs: fix 'Twalk' to only send error if no component walked
  tests/9pfs: guard recent 'Twalk' behaviour fix
  tests/9pfs: check fid being unaffected in fs_walk_2nd_nonexistent

 hw/9pfs/9p.c |  63 +-
 tests/qtest/virtio-9p-test.c | 201 ++-
 2 files changed, 237 insertions(+), 27 deletions(-)

[PULL 4/7] 9pfs: refactor 'name_idx' -> 'nwalked' in v9fs_walk()

2022-06-16 Thread Christian Schoenebeck

The local variable 'name_idx' is used in two loops in function v9fs_walk().
Let the first loop use its own variable 'nwalked' instead, which we will
use in subsequent patch as the number of (requested) path components
successfully walked by background I/O thread.

Signed-off-by: Christian Schoenebeck 
Reviewed-by: Greg Kurz 
Message-Id: 

---
 hw/9pfs/9p.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 0cd0c14c2a..f29611e9ed 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -1766,7 +1766,7 @@ static bool same_stat_id(const struct stat *a, const 
struct stat *b)
 
 static void coroutine_fn v9fs_walk(void *opaque)
 {
-int name_idx;
+int name_idx, nwalked;
 g_autofree V9fsQID *qids = NULL;
 int i, err = 0;
 V9fsPath dpath, path;
@@ -1844,17 +1844,17 @@ static void coroutine_fn v9fs_walk(void *opaque)
 break;
 }
 stbuf = fidst;
-for (name_idx = 0; name_idx < nwnames; name_idx++) {
+for (nwalked = 0; nwalked < nwnames; nwalked++) {
 if (v9fs_request_cancelled(pdu)) {
 err = -EINTR;
 break;
 }
 if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
-strcmp("..", wnames[name_idx].data))
+strcmp("..", wnames[nwalked].data))
 {
 err = s->ops->name_to_path(&s->ctx, &dpath,
-   wnames[name_idx].data,
-   &pathes[name_idx]);
+   wnames[nwalked].data,
+   &pathes[nwalked]);
 if (err < 0) {
 err = -errno;
 break;
@@ -1863,13 +1863,13 @@ static void coroutine_fn v9fs_walk(void *opaque)
 err = -EINTR;
 break;
 }
-err = s->ops->lstat(&s->ctx, &pathes[name_idx], &stbuf);
+err = s->ops->lstat(&s->ctx, &pathes[nwalked], &stbuf);
 if (err < 0) {
 err = -errno;
 break;
 }
-stbufs[name_idx] = stbuf;
-v9fs_path_copy(&dpath, &pathes[name_idx]);
+stbufs[nwalked] = stbuf;
+v9fs_path_copy(&dpath, &pathes[nwalked]);
 }
 }
 });
-- 
2.30.2

Re: [PATCH 0/4] hw/nvme: add support for TP4084

2022-06-16 Thread Klaus Jensen

On Jun  8 03:28, Niklas Cassel via wrote:
> Hello there,
> 
> considering that Linux v5.19-rc1 is out which includes support for
> NVMe TP4084:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/nvme/host/core.c?id=354201c53e61e493017b15327294b0c8ab522d69
> 
> I thought that it might be nice to have QEMU support for the same.
> 
> TP4084 adds a new mode, CC.CRIME, that can be used to mark a namespace
> as ready independently from the controller.
> 
> When CC.CRIME is 0 (default), things behave as before, all namespaces
> are ready when CSTS.RDY gets set to 1.
> 
> Add a new "ready_delay" namespace device parameter, in order to emulate
> different ready latencies for namespaces when CC.CRIME is 1.
> 
> The patch series also adds a "crwmt" controller parameter, in order to
> be able to expose the worst case timeout that the host should wait for
> all namespaces to become ready.
> 
> 
> Example qemu cmd line for the new options:
> 
> # delay in s (20s)
> NS1_DELAY_S=20
> # convert to units of 500ms
> NS1_DELAY=$((NS1_DELAY_S*2))
> 
> # delay in s (60s)
> NS2_DELAY_S=60
> # convert to units of 500ms
> NS2_DELAY=$((NS2_DELAY_S*2))
> 
> # timeout in s (120s)
> CRWMT_S=120
> # convert to units of 500ms
> CRWMT=$((CRWMT_S*2))
> 
>  -device nvme,serial=deadbeef,crwmt=$CRWMT \
>  -drive file=$NS1_DATA,id=nvm-1,format=raw,if=none \
>  -device nvme-ns,drive=nvm-1,ready_delay=$NS1_DELAY \
>  -drive file=$NS2_DATA,id=nvm-2,format=raw,if=none \
>  -device nvme-ns,drive=nvm-2,ready_delay=$NS2_DELAY \
> 
> 
> Niklas Cassel (4):
>   hw/nvme: claim NVMe 2.0 compliance
>   hw/nvme: store a pointer to the NvmeSubsystem in the NvmeNamespace
>   hw/nvme: add support for ratified TP4084
>   hw/nvme: add new never_ready parameter to test the DNR bit
> 
>  hw/nvme/ctrl.c   | 151 +--
>  hw/nvme/ns.c |  17 +
>  hw/nvme/nvme.h   |   9 +++
>  hw/nvme/trace-events |   1 +
>  include/block/nvme.h |  60 -
>  5 files changed, 233 insertions(+), 5 deletions(-)
> 
> -- 
> 2.36.1
> 
> 

Hi Niklas,

I've been going back and forth on my position on this.

I'm not straight up against it, but this only seems useful as a one-off
patch to test the kernel support for this. Considering the limitations
you state and the limited use case, I fear this is a little bloaty to
carry upstream.

But I totally acknowledge that this is a horrible complicated behavior
to implement on the driver side, so I guess we might all benefit from
this.

Keith, do you have an opinion on this?


signature.asc
Description: PGP signature

[PULL 1/7] tests/9pfs: walk to non-existent dir

2022-06-16 Thread Christian Schoenebeck

Expect ENOENT Rlerror response when trying to walk to a
non-existent directory.

Signed-off-by: Christian Schoenebeck 
Reviewed-by: Greg Kurz 
Based-on: 
Message-Id: 
<1f5aa50ace3ba3861ea31e367518282065a6.1647339025.git.qemu_...@crudebyte.com>
---
 tests/qtest/virtio-9p-test.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index e28c71bd8f..b3837546be 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -606,6 +606,25 @@ static uint32_t do_walk(QVirtio9P *v9p, const char *path)
 return fid;
 }
 
+/* utility function: walk to requested dir and expect passed error response */
+static void do_walk_expect_error(QVirtio9P *v9p, const char *path, uint32_t 
err)
+{
+char **wnames;
+P9Req *req;
+uint32_t _err;
+const uint32_t fid = genfid();
+
+int nwnames = split(path, "/", &wnames);
+
+req = v9fs_twalk(v9p, 0, fid, nwnames, wnames, 0);
+v9fs_req_wait_for_reply(req, NULL);
+v9fs_rlerror(req, &_err);
+
+g_assert_cmpint(_err, ==, err);
+
+split_free(&wnames);
+}
+
 static void fs_version(void *obj, void *data, QGuestAllocator *t_alloc)
 {
 alloc = t_alloc;
@@ -974,6 +993,15 @@ static void fs_walk_no_slash(void *obj, void *data, 
QGuestAllocator *t_alloc)
 g_free(wnames[0]);
 }
 
+static void fs_walk_nonexistent(void *obj, void *data, QGuestAllocator 
*t_alloc)
+{
+QVirtio9P *v9p = obj;
+alloc = t_alloc;
+
+do_attach(v9p);
+do_walk_expect_error(v9p, "non-existent", ENOENT);
+}
+
 static void fs_walk_dotdot(void *obj, void *data, QGuestAllocator *t_alloc)
 {
 QVirtio9P *v9p = obj;
@@ -1409,6 +1437,8 @@ static void register_virtio_9p_test(void)
   &opts);
 qos_add_test("synth/walk/dotdot_from_root", "virtio-9p",
  fs_walk_dotdot,  &opts);
+qos_add_test("synth/walk/non_existent", "virtio-9p", fs_walk_nonexistent,
+  &opts);
 qos_add_test("synth/lopen/basic", "virtio-9p", fs_lopen,  &opts);
 qos_add_test("synth/write/basic", "virtio-9p", fs_write,  &opts);
 qos_add_test("synth/flush/success", "virtio-9p", fs_flush_success,
-- 
2.30.2

[PULL 6/7] tests/9pfs: guard recent 'Twalk' behaviour fix

2022-06-16 Thread Christian Schoenebeck

Previous 9p patch fixed 'Twalk' request handling, which was previously not
behaving as specified by the 9p2000 protocol spec. This patch adds a new test
case which guards the new 'Twalk' behaviour in question.

More specifically: it sends a 'Twalk' request where the 1st path component
is valid, whereas the 2nd path component transmitted to server does not
exist. The expected behaviour is that 9p server would respond by sending
a 'Rwalk' response with exactly 1 QID (instead of 'Rlerror' response).

Signed-off-by: Christian Schoenebeck 
Reviewed-by: Greg Kurz 
Message-Id: 
<61bde2f44b87e24b70ec098dfb81765665b2dfcb.1647339025.git.qemu_...@crudebyte.com>
---
 tests/qtest/virtio-9p-test.c | 42 +---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index 3c0f094929..c787ded4d2 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -669,8 +669,12 @@ static void do_version(QVirtio9P *v9p)
 g_assert_cmpmem(server_version, server_len, version, strlen(version));
 }
 
-/* utility function: walk to requested dir and return fid for that dir */
-static uint32_t do_walk(QVirtio9P *v9p, const char *path)
+/*
+ * utility function: walk to requested dir and return fid for that dir and
+ * the QIDs of server response
+ */
+static uint32_t do_walk_rqids(QVirtio9P *v9p, const char *path, uint16_t 
*nwqid,
+  v9fs_qid **wqid)
 {
 char **wnames;
 P9Req *req;
@@ -680,12 +684,18 @@ static uint32_t do_walk(QVirtio9P *v9p, const char *path)
 
 req = v9fs_twalk(v9p, 0, fid, nwnames, wnames, 0);
 v9fs_req_wait_for_reply(req, NULL);
-v9fs_rwalk(req, NULL, NULL);
+v9fs_rwalk(req, nwqid, wqid);
 
 split_free(&wnames);
 return fid;
 }
 
+/* utility function: walk to requested dir and return fid for that dir */
+static uint32_t do_walk(QVirtio9P *v9p, const char *path)
+{
+return do_walk_rqids(v9p, path, NULL, NULL);
+}
+
 /* utility function: walk to requested dir and expect passed error response */
 static void do_walk_expect_error(QVirtio9P *v9p, const char *path, uint32_t 
err)
 {
@@ -1079,9 +1089,33 @@ static void fs_walk_nonexistent(void *obj, void *data, 
QGuestAllocator *t_alloc)
 alloc = t_alloc;
 
 do_attach(v9p);
+/*
+ * The 9p2000 protocol spec says: "If the first element cannot be walked
+ * for any reason, Rerror is returned."
+ */
 do_walk_expect_error(v9p, "non-existent", ENOENT);
 }
 
+static void fs_walk_2nd_nonexistent(void *obj, void *data,
+QGuestAllocator *t_alloc)
+{
+QVirtio9P *v9p = obj;
+alloc = t_alloc;
+uint16_t nwqid;
+g_autofree v9fs_qid *wqid = NULL;
+g_autofree char *path = g_strdup_printf(
+QTEST_V9FS_SYNTH_WALK_FILE "/non-existent", 0
+);
+
+do_attach(v9p);
+do_walk_rqids(v9p, path, &nwqid, &wqid);
+/*
+ * The 9p2000 protocol spec says: "nwqid is therefore either nwname or the
+ * index of the first elementwise walk that failed."
+ */
+assert(nwqid == 1);
+}
+
 static void fs_walk_none(void *obj, void *data, QGuestAllocator *t_alloc)
 {
 QVirtio9P *v9p = obj;
@@ -1548,6 +1582,8 @@ static void register_virtio_9p_test(void)
  fs_walk_dotdot,  &opts);
 qos_add_test("synth/walk/non_existent", "virtio-9p", fs_walk_nonexistent,
   &opts);
+qos_add_test("synth/walk/2nd_non_existent", "virtio-9p",
+ fs_walk_2nd_nonexistent, &opts);
 qos_add_test("synth/lopen/basic", "virtio-9p", fs_lopen,  &opts);
 qos_add_test("synth/write/basic", "virtio-9p", fs_write,  &opts);
 qos_add_test("synth/flush/success", "virtio-9p", fs_flush_success,
-- 
2.30.2

[PULL 3/7] tests/9pfs: compare QIDs in fs_walk_none() test

2022-06-16 Thread Christian Schoenebeck

Extend previously added fs_walk_none() test by comparing the QID
of the root fid with the QID of the cloned fid. They should be
equal.

Signed-off-by: Christian Schoenebeck 
Reviewed-by: Greg Kurz 
Message-Id: 
<5bbe9c6931b4600a9a23742f5ff2d38c1188237d.1647339025.git.qemu_...@crudebyte.com>
---
 tests/qtest/virtio-9p-test.c | 87 
 1 file changed, 87 insertions(+)

diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index 7942d5fef9..3c0f094929 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -371,8 +371,15 @@ static P9Req *v9fs_tattach(QVirtio9P *v9p, uint32_t fid, 
uint32_t n_uname,
 return req;
 }
 
+/* type[1] version[4] path[8] */
 typedef char v9fs_qid[13];
 
+static inline bool is_same_qid(v9fs_qid a, v9fs_qid b)
+{
+/* don't compare QID version for checking for file ID equalness */
+return a[0] == b[0] && memcmp(&a[5], &b[5], 8) == 0;
+}
+
 /* size[4] Rattach tag[2] qid[13] */
 static void v9fs_rattach(P9Req *req, v9fs_qid *qid)
 {
@@ -425,6 +432,79 @@ static void v9fs_rwalk(P9Req *req, uint16_t *nwqid, 
v9fs_qid **wqid)
 v9fs_req_free(req);
 }
 
+/* size[4] Tgetattr tag[2] fid[4] request_mask[8] */
+static P9Req *v9fs_tgetattr(QVirtio9P *v9p, uint32_t fid, uint64_t 
request_mask,
+uint16_t tag)
+{
+P9Req *req;
+
+req = v9fs_req_init(v9p, 4 + 8, P9_TGETATTR, tag);
+v9fs_uint32_write(req, fid);
+v9fs_uint64_write(req, request_mask);
+v9fs_req_send(req);
+return req;
+}
+
+typedef struct v9fs_attr {
+uint64_t valid;
+v9fs_qid qid;
+uint32_t mode;
+uint32_t uid;
+uint32_t gid;
+uint64_t nlink;
+uint64_t rdev;
+uint64_t size;
+uint64_t blksize;
+uint64_t blocks;
+uint64_t atime_sec;
+uint64_t atime_nsec;
+uint64_t mtime_sec;
+uint64_t mtime_nsec;
+uint64_t ctime_sec;
+uint64_t ctime_nsec;
+uint64_t btime_sec;
+uint64_t btime_nsec;
+uint64_t gen;
+uint64_t data_version;
+} v9fs_attr;
+
+#define P9_GETATTR_BASIC0x07ffULL /* Mask for fields up to BLOCKS */
+
+/*
+ * size[4] Rgetattr tag[2] valid[8] qid[13] mode[4] uid[4] gid[4] nlink[8]
+ *  rdev[8] size[8] blksize[8] blocks[8]
+ *  atime_sec[8] atime_nsec[8] mtime_sec[8] mtime_nsec[8]
+ *  ctime_sec[8] ctime_nsec[8] btime_sec[8] btime_nsec[8]
+ *  gen[8] data_version[8]
+ */
+static void v9fs_rgetattr(P9Req *req, v9fs_attr *attr)
+{
+v9fs_req_recv(req, P9_RGETATTR);
+
+v9fs_uint64_read(req, &attr->valid);
+v9fs_memread(req, &attr->qid, 13);
+v9fs_uint32_read(req, &attr->mode);
+v9fs_uint32_read(req, &attr->uid);
+v9fs_uint32_read(req, &attr->gid);
+v9fs_uint64_read(req, &attr->nlink);
+v9fs_uint64_read(req, &attr->rdev);
+v9fs_uint64_read(req, &attr->size);
+v9fs_uint64_read(req, &attr->blksize);
+v9fs_uint64_read(req, &attr->blocks);
+v9fs_uint64_read(req, &attr->atime_sec);
+v9fs_uint64_read(req, &attr->atime_nsec);
+v9fs_uint64_read(req, &attr->mtime_sec);
+v9fs_uint64_read(req, &attr->mtime_nsec);
+v9fs_uint64_read(req, &attr->ctime_sec);
+v9fs_uint64_read(req, &attr->ctime_nsec);
+v9fs_uint64_read(req, &attr->btime_sec);
+v9fs_uint64_read(req, &attr->btime_nsec);
+v9fs_uint64_read(req, &attr->gen);
+v9fs_uint64_read(req, &attr->data_version);
+
+v9fs_req_free(req);
+}
+
 /* size[4] Treaddir tag[2] fid[4] offset[8] count[4] */
 static P9Req *v9fs_treaddir(QVirtio9P *v9p, uint32_t fid, uint64_t offset,
 uint32_t count, uint16_t tag)
@@ -1009,6 +1089,7 @@ static void fs_walk_none(void *obj, void *data, 
QGuestAllocator *t_alloc)
 v9fs_qid root_qid;
 g_autofree v9fs_qid *wqid = NULL;
 P9Req *req;
+struct v9fs_attr attr;
 
 do_version(v9p);
 req = v9fs_tattach(v9p, 0, getuid(), 0);
@@ -1021,6 +1102,12 @@ static void fs_walk_none(void *obj, void *data, 
QGuestAllocator *t_alloc)
 
 /* special case: no QID is returned if nwname=0 was sent */
 g_assert(wqid == NULL);
+
+req = v9fs_tgetattr(v9p, 1, P9_GETATTR_BASIC, 0);
+v9fs_req_wait_for_reply(req, NULL);
+v9fs_rgetattr(req, &attr);
+
+g_assert(is_same_qid(root_qid, attr.qid));
 }
 
 static void fs_walk_dotdot(void *obj, void *data, QGuestAllocator *t_alloc)
-- 
2.30.2

[PULL 2/7] tests/9pfs: Twalk with nwname=0

2022-06-16 Thread Christian Schoenebeck

Send Twalk request with nwname=0. In this case no QIDs should
be returned by 9p server; this is equivalent to walking to dot.

Signed-off-by: Christian Schoenebeck 
Reviewed-by: Greg Kurz 
Message-Id: 

---
 tests/qtest/virtio-9p-test.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index b3837546be..7942d5fef9 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -1002,6 +1002,27 @@ static void fs_walk_nonexistent(void *obj, void *data, 
QGuestAllocator *t_alloc)
 do_walk_expect_error(v9p, "non-existent", ENOENT);
 }
 
+static void fs_walk_none(void *obj, void *data, QGuestAllocator *t_alloc)
+{
+QVirtio9P *v9p = obj;
+alloc = t_alloc;
+v9fs_qid root_qid;
+g_autofree v9fs_qid *wqid = NULL;
+P9Req *req;
+
+do_version(v9p);
+req = v9fs_tattach(v9p, 0, getuid(), 0);
+v9fs_req_wait_for_reply(req, NULL);
+v9fs_rattach(req, &root_qid);
+
+req = v9fs_twalk(v9p, 0, 1, 0, NULL, 0);
+v9fs_req_wait_for_reply(req, NULL);
+v9fs_rwalk(req, NULL, &wqid);
+
+/* special case: no QID is returned if nwname=0 was sent */
+g_assert(wqid == NULL);
+}
+
 static void fs_walk_dotdot(void *obj, void *data, QGuestAllocator *t_alloc)
 {
 QVirtio9P *v9p = obj;
@@ -1435,6 +1456,7 @@ static void register_virtio_9p_test(void)
 qos_add_test("synth/walk/basic", "virtio-9p", fs_walk,  &opts);
 qos_add_test("synth/walk/no_slash", "virtio-9p", fs_walk_no_slash,
   &opts);
+qos_add_test("synth/walk/none", "virtio-9p", fs_walk_none, &opts);
 qos_add_test("synth/walk/dotdot_from_root", "virtio-9p",
  fs_walk_dotdot,  &opts);
 qos_add_test("synth/walk/non_existent", "virtio-9p", fs_walk_nonexistent,
-- 
2.30.2

Re: [PATCH V8 06/39] cpr: reboot mode

2022-06-16 Thread Daniel P . Berrangé

On Wed, Jun 15, 2022 at 07:51:53AM -0700, Steve Sistare wrote:
> Provide the cpr-save and cpr-load functions for live update.  These save and
> restore VM state, with minimal guest pause time, so that qemu may be updated
> to a new version in between.
> 
> cpr-save stops the VM and saves vmstate to an ordinary file.  It supports
> any type of guest image and block device, but the caller must not modify
> guest block devices between cpr-save and cpr-load.
> 
> cpr-save supports several modes, the first of which is reboot. In this mode
> the caller invokes cpr-save and then terminates qemu.  The caller may then
> update the host kernel and system software and reboot.  The caller resumes
> the guest by running qemu with the same arguments as the original process
> and invoking cpr-load.  To use this mode, guest ram must be mapped to a
> persistent shared memory file such as /dev/dax0.0 or /dev/shm PKRAM.
> 
> The reboot mode supports vfio devices if the caller first suspends the
> guest, such as by issuing guest-suspend-ram to the qemu guest agent.  The
> guest drivers' suspend methods flush outstanding requests and re-initialize
> the devices, and thus there is no device state to save and restore.
> 
> cpr-load loads state from the file.  If the VM was running at cpr-save time
> then VM execution resumes.  If the VM was suspended at cpr-save time, then
> the caller must issue a system_wakeup command to resume.
> 
> cpr-save syntax:
>   { 'enum': 'CprMode', 'data': [ 'reboot' ] }
>   { 'command': 'cpr-save', 'data': { 'filename': 'str', 'mode': 'CprMode' }}
> 
> cpr-load syntax:
>   { 'command': 'cpr-load', 'data': { 'filename': 'str', 'mode': 'CprMode' }}

I'm still a little unsure if this direction for QAPI exposure is the
best, or whether we should instead leverage the migration commands.

I particularly concerned that we might regret having an API that
is designed only around storage in local files/blockdevs. The
migration layer has flexibility to use many protocols which has
been useful in the past to be able to offload work to an external
process. For example, libvirt uses migrate-to-fd so it can use
a helper that adds O_DIRECT support such that we avoid trashing
the host I/O cache for save/restore.

At the same time though, the migrate APIs don't currently support
a plain "file" protocol. This was because historically we needed
the QEMUFile to support O_NONBLOCK and this fails with plain
files or block devices, so QEMU threads could get blocked. For
the save side this doesn't matter so much, as QEMU now has the
outgoing migrate channels in blocking mode, only the incoming
side use non-blocking.  We could add a plain "file" protocol
to migration if we clearly document its limitations, and indeed
I've suggested we do that for another unrelated bit of work
for libvirts VM save/restore functionality.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [QEMU PATCH v2 1/6] tests/acpi: allow SSDT changes

2022-06-16 Thread Igor Mammedov

On Mon, 30 May 2022 11:40:42 +0800
Robert Hoo  wrote:

> Signed-off-by: Robert Hoo 
> Reviewed-by: Jingqi Liu 

Reviewed-by: Igor Mammedov 

> ---
>  tests/qtest/bios-tables-test-allowed-diff.h | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
> b/tests/qtest/bios-tables-test-allowed-diff.h
> index dfb8523c8b..eb8bae1407 100644
> --- a/tests/qtest/bios-tables-test-allowed-diff.h
> +++ b/tests/qtest/bios-tables-test-allowed-diff.h
> @@ -1 +1,3 @@
>  /* List of comma-separated changed AML files to ignore */
> +"tests/data/acpi/pc/SSDT.dimmpxm",
> +"tests/data/acpi/q35/SSDT.dimmpxm",

Re: [PATCH v3 11/17] migration/qemu-file: Fix qemu_ftell() for non-writable file

2022-06-16 Thread Daniel P . Berrangé

On Thu, Jun 16, 2022 at 01:28:05PM +0300, nikita.laps...@openvz.org wrote:
> From: Nikita Lapshin 
> 
> qemu_ftell() will return wrong value for non-writable QEMUFile.
> This happens due to call qemu_fflush() inside qemu_ftell(), this
> function won't flush if file is readable.

Well the return value isn't necessarily wrong today - it really
depends what semantics each callers desires.

Can you say what particular caller needs these semantics changed
and the impact on them from current behaviour ?

> Signed-off-by: Nikita Lapshin 
> ---
>  migration/qemu-file.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 1479cddad9..53ccef80ac 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -663,7 +663,8 @@ int64_t qemu_ftell_fast(QEMUFile *f)
>  int64_t qemu_ftell(QEMUFile *f)
>  {
>  qemu_fflush(f);
> -return f->pos;
> +/* Consider that qemu_fflush() won't work if file is non-writable */
> +return f->pos + f->buf_index;
>  }

IIUC, this is more or less trying to make 'qemu_ftell' be
equivalent to 'qemu_ftell_fast' semantics in the non-writable
case. But that makes me wonder if whichever calls has problems,
shouldn't be just changed to use  qemu_ftell_fast instead ?

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v2 1/2] hw/nvme: Implement shadow doorbell buffer support

2022-06-16 Thread Jinhao Fan

> On Jun 16, 2022, at 6:40 PM, Klaus Jensen  wrote:
> 
> This wont work for drivers that *do* rely on updating the buffer for
> admin queues, so we should read it regardless of the value of the queue
> id (since we are now updating it through the "Keith Hack^TM").

Sure. Sorry I forgot to update this.

> 
>> if (nvme_cq_full(cq)) {
>> break;
>> }
>> @@ -4237,6 +4248,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest 
>> *req)
>> static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
>>  uint16_t sqid, uint16_t cqid, uint16_t size)
>> {
>> +uint32_t stride = 4 << NVME_CAP_DSTRD(n->bar.cap);
> 
> You need to load the little endian value with ldq_le_p(&n->bar.cap).
> Sorry, didn't catch this in v1.

Thanks for pointing this out. Will send v3 soon.

[PATCH] hw/i386/postcard.c: New ISA POST card device

2022-06-16 Thread Lev Kujawski

Move the ioport80 memory functionality within hw/i386/pc.c into a new
ISA POST card device capable of being instantiated at different ports.

Emulate actual i440FX hardware by storing the value written to the
port, regardless of the presence of a POST card, a behavior relied
upon by firmware for these systems (e.g., AMIBIOS) to track the state
of the boot process.

To aid firmware debugging, allow for tracing POST card values,
mimicking the [new value, old value] display format of actual cards.

Tested with AMIBIOS.

Signed-off-by: Lev Kujawski 
---
 hw/i386/Kconfig|   5 ++
 hw/i386/meson.build|   1 +
 hw/i386/pc.c   |  25 +--
 hw/i386/postcard.c | 149 +
 hw/i386/trace-events   |   3 +
 include/hw/i386/postcard.h |  35 +
 6 files changed, 195 insertions(+), 23 deletions(-)
 create mode 100644 hw/i386/postcard.c
 create mode 100644 include/hw/i386/postcard.h

diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index d22ac4a4b9..17979e5c0d 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -10,6 +10,10 @@ config SGX
 bool
 depends on KVM
 
+config POST_CARD
+bool
+depends on ISA_BUS
+
 config PC
 bool
 imply APPLESMC
@@ -40,6 +44,7 @@ config PC
 select PCSPK
 select I8257
 select MC146818RTC
+select POST_CARD
 # For ACPI builder:
 select SERIAL_ISA
 select ACPI_PCI
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 213e2e82b3..c883e8ec9a 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -28,6 +28,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
   'port92.c'))
 i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
 if_false: 
files('pc_sysfw_ovmf-stubs.c'))
+i386_ss.add(when: 'CONFIG_POST_CARD', if_true: files('postcard.c'))
 
 subdir('kvm')
 subdir('xen')
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 774cb2bf07..c179e38a61 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -32,6 +32,7 @@
 #include "hw/i386/topology.h"
 #include "hw/i386/fw_cfg.h"
 #include "hw/i386/vmport.h"
+#include "hw/i386/postcard.h"
 #include "sysemu/cpus.h"
 #include "hw/block/fdc.h"
 #include "hw/ide.h"
@@ -403,16 +404,6 @@ GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled)
 return s;
 }
 
-static void ioport80_write(void *opaque, hwaddr addr, uint64_t data,
-   unsigned size)
-{
-}
-
-static uint64_t ioport80_read(void *opaque, hwaddr addr, unsigned size)
-{
-return 0xULL;
-}
-
 /* MSDOS compatibility mode FPU exception support */
 static void ioportF0_write(void *opaque, hwaddr addr, uint64_t data,
unsigned size)
@@ -1059,16 +1050,6 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus 
*pci_bus)
 return dev;
 }
 
-static const MemoryRegionOps ioport80_io_ops = {
-.write = ioport80_write,
-.read = ioport80_read,
-.endianness = DEVICE_NATIVE_ENDIAN,
-.impl = {
-.min_access_size = 1,
-.max_access_size = 1,
-},
-};
-
 static const MemoryRegionOps ioportF0_io_ops = {
 .write = ioportF0_write,
 .read = ioportF0_read,
@@ -1139,12 +1120,10 @@ void pc_basic_device_init(struct PCMachineState *pcms,
 qemu_irq pit_alt_irq = NULL;
 qemu_irq rtc_irq = NULL;
 ISADevice *pit = NULL;
-MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
 MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
 X86MachineState *x86ms = X86_MACHINE(pcms);
 
-memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, 
"ioport80", 1);
-memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io);
+(void)post_card_init(isa_bus, POST_CARD_PORT_DEFAULT);
 
 memory_region_init_io(ioportF0_io, NULL, &ioportF0_io_ops, NULL, 
"ioportF0", 1);
 memory_region_add_subregion(isa_bus->address_space_io, 0xf0, ioportF0_io);
diff --git a/hw/i386/postcard.c b/hw/i386/postcard.c
new file mode 100644
index 00..c9fa263510
--- /dev/null
+++ b/hw/i386/postcard.c
@@ -0,0 +1,149 @@
+/*
+ * QEMU PC System Emulator
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT S

Re: [QEMU PATCH v2 3/6] acpi/nvdimm: NVDIMM _DSM Spec supports revision 2

2022-06-16 Thread Igor Mammedov

On Mon, 30 May 2022 11:40:44 +0800
Robert Hoo  wrote:

> The Intel Optane PMem DSM Interface, Version 2.0 [1], is the up-to-date
> spec for NVDIMM _DSM definition, which supports revision_id == 2.
> 
> Nevertheless, Rev.2 of NVDIMM _DSM has no functional change on those Label
> Data _DSM Functions, which are the only ones implemented for vNVDIMM.
> So, simple change to support this revision_id == 2 case.
> 
> [1] https://pmem.io/documents/IntelOptanePMem_DSM_Interface-V2.0.pdf

pls enumerate functions that QEMU implement and that are supported by rev=2,
do we really need rev2 ?

also don't we need make sure that rev1 only function are excluded?
/spec above says, functions 3-6 are deprecated and limited to rev1 only/
"
Warning: This function has been deprecated in preference to the ACPI 6.2 _LSW 
(Label Storage Write)
NVDIMM Device Interface and is only supported with Arg1 – Revision Id = 1. It 
is included here for
backwards compatibility with existing Arg1 - Revision Id = 1 implementations.
"

> 
> Signed-off-by: Robert Hoo 
> Reviewed-by: Jingqi Liu 
> ---
>  hw/acpi/nvdimm.c | 10 +++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
> index 0ab247a870..59b42afcf1 100644
> --- a/hw/acpi/nvdimm.c
> +++ b/hw/acpi/nvdimm.c
> @@ -849,9 +849,13 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t 
> val, unsigned size)
>  nvdimm_debug("Revision 0x%x Handler 0x%x Function 0x%x.\n", in->revision,
>   in->handle, in->function);
>  
> -if (in->revision != 0x1 /* Currently we only support DSM Spec Rev1. */) {
> -nvdimm_debug("Revision 0x%x is not supported, expect 0x%x.\n",
> - in->revision, 0x1);
> +/*
> + * Current NVDIMM _DSM Spec supports Rev1 and Rev2
> + * Intel® OptanePersistent Memory Module DSM Interface, Revision 2.0
> + */
> +if (in->revision != 0x1 && in->revision != 0x2) {
> +nvdimm_debug("Revision 0x%x is not supported, expect 0x1 or 0x2.\n",
> + in->revision);
>  nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
>  goto exit;
>  }

Re: Corrupted display changing screen colour depth in qemu-system-ppc/MacOS

2022-06-16 Thread Gerd Hoffmann

On Thu, Jun 16, 2022 at 09:54:46AM +0100, Mark Cave-Ayland wrote:
> Hi all,
> 
> Howard pointed me off-list to a corrupted screen display issue experienced
> when changing the screen colour depth in MacOS under qemu-system-ppc. I was
> able to reproduce it here, and noticed from the output that the issue was
> likely due to the host display not updating its depth accordingly.
> 
> After it was confirmed to me that this was working in QEMU 6.2, I was able
> to eventually bisect the problem down to this commit:
> 
> 
> cb8962c146b2633a4b04562281de9b2703bba849 is the first bad commit
> commit cb8962c146b2633a4b04562281de9b2703bba849
> Author: Marc-André Lureau 
> Date:   Tue Feb 15 00:13:37 2022 +0400
> 
> ui: do not create a surface when resizing a GL scanout
> 
> qemu_console_resize() will create a blank surface and replace the
> current scanout with it if called while the current scanout is
> GL (texture or dmabuf).
> 
> This is not only very costly, but also can produce glitches on the
> display/listener side.
> 
> Instead, compare the current console size with the fitting console
> functions, which also works when the scanout is GL.
> 
> Note: there might be still an unnecessary surface creation on calling
> qemu_console_resize() when the size is actually changing, but display
> backends currently rely on DisplaySurface details during
> dpy_gfx_switch() to handle various resize aspects. We would need more
> refactoring to handle resize without DisplaySurface, this is left for a
> future improvement.
> 
> Signed-off-by: Marc-André Lureau 
> Message-Id: <20220214201337.1814787-4-marcandre.lur...@redhat.com>
> Signed-off-by: Gerd Hoffmann 
> 
>  ui/console.c | 7 +++
>  1 file changed, 3 insertions(+), 4 deletions(-)
> 
> 
> Some more background: the screen in qemu-system-ppc's MacOS is controlled
> via a custom driver written by Ben which uses the Bochs VBE registers to
> change the screen width/height/depth. The code used to do this can be found
> at 
> https://gitlab.com/qemu-project/QemuMacDrivers/-/blob/master/QemuVGADriver/src/QemuVga.c#L354.
> 
> Looking at the changes in cb8962c146 my guess would be that either the
> updated check in qemu_console_resize() should also check to see if the
> surface depth is unchanged before exiting early, or that there is an extra
> update required in the VGA device when changing just the screen colour depth
> by itself.

Can you try ditch the QEMU_ALLOCATED_FLAG check added by the commit?

Which depth changes triggers this?  Going from direct color to a
paletted mode?

take care,
  Gerd

[PATCH v17 02/13] linux-user: Add LoongArch signal support

2022-06-16 Thread Song Gao

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
---
 linux-user/loongarch64/signal.c| 312 +
 linux-user/loongarch64/target_signal.h |  13 ++
 2 files changed, 325 insertions(+)
 create mode 100644 linux-user/loongarch64/signal.c
 create mode 100644 linux-user/loongarch64/target_signal.h

diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
new file mode 100644
index 00..e291d72f85
--- /dev/null
+++ b/linux-user/loongarch64/signal.c
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch emulation of Linux signals
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "user-internals.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+#include "target/loongarch/internals.h"
+
+/* FP context was used */
+#define SC_USED_FP  (1 << 0)
+
+struct target_sigcontext {
+uint64_t sc_pc;
+uint64_t sc_regs[32];
+uint32_t sc_flags;
+uint64_t sc_extcontext[0] __attribute__((aligned(16)));
+};
+
+struct target_fpu_context {
+uint64_t regs[32];
+uint64_t fcc;
+uint32_t fcsr;
+};
+
+struct target_sctx_info {
+uint32_t magic;
+uint32_t size;
+uint64_t padding;  /* padding to 16 bytes */
+};
+
+struct target_ucontext {
+target_ulong tuc_flags;
+struct target_ucontext *tuc_link;
+target_stack_t tuc_stack;
+target_sigset_t tuc_sigmask;
+uint8_t __unused[1024 / 8 - sizeof(target_sigset_t)];
+struct target_sigcontext tuc_mcontext;
+};
+
+struct target_rt_sigframe {
+struct target_siginfo rs_info;
+struct target_ucontext rs_uc;
+};
+
+struct ctx_layout {
+abi_ulong addr;
+unsigned int size;
+};
+
+struct extctx_layout {
+unsigned int size;
+unsigned int flags;
+struct ctx_layout fpu;
+struct ctx_layout end;
+};
+
+static uint64_t read_all_fcc(CPULoongArchState *env)
+{
+uint64_t ret = 0;
+
+for (int i = 0; i < 8; ++i) {
+ret |= (uint64_t)env->cf[i] << (i * 8);
+}
+
+return ret;
+}
+
+static void write_all_fcc(CPULoongArchState *env, uint64_t val)
+{
+for (int i = 0; i < 8; ++i) {
+env->cf[i] = (val >> (i * 8)) & 1;
+}
+}
+
+static void *get_ctx(struct target_sctx_info *info)
+{
+return (char *)info + sizeof(struct target_sctx_info);
+}
+
+static void copy_fpu_to_sigcontext(CPULoongArchState *env,
+   struct extctx_layout *extctx)
+{
+int i;
+struct target_sctx_info *info = (struct target_sctx_info 
*)extctx->fpu.addr;
+struct target_fpu_context *fpu_ctx = get_ctx(info);
+
+for (i = 1; i < 32; ++i) {
+__put_user(env->fpr[i], &fpu_ctx->regs[i]);
+}
+
+fpu_ctx->fcc = read_all_fcc(env);
+__put_user(env->fcsr0, &fpu_ctx->fcsr);
+__put_user(extctx->fpu.size, &info->size);
+}
+
+static abi_ulong extframe_alloc(struct extctx_layout *extctx,
+struct ctx_layout *layout,
+size_t size, abi_ulong base)
+{
+abi_ulong new_base = base - size;
+
+new_base -= sizeof(struct target_sctx_info);
+layout->addr = new_base;
+layout->size = (unsigned int)(base - new_base);
+extctx->size += layout->size;
+
+return new_base;
+}
+
+static abi_ulong setup_extcontext(struct extctx_layout *extctx, abi_ulong sp)
+{
+abi_ulong new_sp = sp;
+
+memset(extctx, 0, sizeof(struct extctx_layout));
+new_sp -= sizeof(struct target_sctx_info);
+
+extctx->end.addr = new_sp;
+extctx->end.size = (unsigned int)sizeof(struct target_sctx_info);
+extctx->size += extctx->end.size;
+extctx->flags = SC_USED_FP;
+
+new_sp = extframe_alloc(extctx, &extctx->fpu,
+sizeof(struct target_fpu_context), new_sp);
+
+return new_sp;
+}
+
+static void setup_sigcontext(CPULoongArchState *env,
+ struct target_sigcontext *sc,
+ struct extctx_layout *extctx)
+{
+int i;
+
+__put_user(extctx->flags, &sc->sc_flags);
+__put_user(env->pc, &sc->sc_pc);
+
+for (i = 1; i < 32; ++i) {
+__put_user(env->gpr[i], &sc->sc_regs[i]);
+}
+
+copy_fpu_to_sigcontext(env, extctx);
+}
+
+static void copy_fpu_from_sigcontext(CPULoongArchState *env,
+ struct extctx_layout *extctx)
+{
+int i;
+struct target_sctx_info *info = (struct target_sctx_info 
*)extctx->fpu.addr;
+struct target_fpu_context *fpu_ctx = get_ctx(info);
+
+for (i = 1; i < 32; ++i) {
+__get_user(env->fpr[i], &fpu_ctx->regs[i]);
+}
+write_all_fcc(env, fpu_ctx->fcc);
+__get_user(env->fcsr0, &fpu_ctx->fcsr);
+}
+
+static int parse_extcontext(struct target_sigcontext *sc,
+ struct extctx_layout *extctx)
+{
+unsigned int size;
+struct target_sctx_info *info = (struct target_sctx_info *)
+

[PATCH v17 01/13] linux-user: Add LoongArch generic header files

2022-06-16 Thread Song Gao

This includes:
- sockbits.h
- target_errno_defs.h
- target_fcntl.h
- termbits.h
- target_resource.h
- target_structs.h

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: WANG Xuerui 
---
 linux-user/loongarch64/sockbits.h  | 11 +++
 linux-user/loongarch64/target_errno_defs.h | 12 
 linux-user/loongarch64/target_fcntl.h  | 11 +++
 linux-user/loongarch64/target_prctl.h  |  1 +
 linux-user/loongarch64/target_resource.h   | 11 +++
 linux-user/loongarch64/target_structs.h| 11 +++
 linux-user/loongarch64/termbits.h  | 11 +++
 7 files changed, 68 insertions(+)
 create mode 100644 linux-user/loongarch64/sockbits.h
 create mode 100644 linux-user/loongarch64/target_errno_defs.h
 create mode 100644 linux-user/loongarch64/target_fcntl.h
 create mode 100644 linux-user/loongarch64/target_prctl.h
 create mode 100644 linux-user/loongarch64/target_resource.h
 create mode 100644 linux-user/loongarch64/target_structs.h
 create mode 100644 linux-user/loongarch64/termbits.h

diff --git a/linux-user/loongarch64/sockbits.h 
b/linux-user/loongarch64/sockbits.h
new file mode 100644
index 00..1cffcae120
--- /dev/null
+++ b/linux-user/loongarch64/sockbits.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_SOCKBITS_H
+#define LOONGARCH_TARGET_SOCKBITS_H
+
+#include "../generic/sockbits.h"
+
+#endif
diff --git a/linux-user/loongarch64/target_errno_defs.h 
b/linux-user/loongarch64/target_errno_defs.h
new file mode 100644
index 00..c198b8aca9
--- /dev/null
+++ b/linux-user/loongarch64/target_errno_defs.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_ERRNO_DEFS_H
+#define LOONGARCH_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/loongarch64/target_fcntl.h 
b/linux-user/loongarch64/target_fcntl.h
new file mode 100644
index 00..99bf586854
--- /dev/null
+++ b/linux-user/loongarch64/target_fcntl.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_FCNTL_H
+#define LOONGARCH_TARGET_FCNTL_H
+
+#include "../generic/fcntl.h"
+
+#endif
diff --git a/linux-user/loongarch64/target_prctl.h 
b/linux-user/loongarch64/target_prctl.h
new file mode 100644
index 00..eb53b31ad5
--- /dev/null
+++ b/linux-user/loongarch64/target_prctl.h
@@ -0,0 +1 @@
+/* No special prctl support required. */
diff --git a/linux-user/loongarch64/target_resource.h 
b/linux-user/loongarch64/target_resource.h
new file mode 100644
index 00..0f86bf24ee
--- /dev/null
+++ b/linux-user/loongarch64/target_resource.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_RESOURCE_H
+#define LOONGARCH_TARGET_RESOURCE_H
+
+#include "../generic/target_resource.h"
+
+#endif
diff --git a/linux-user/loongarch64/target_structs.h 
b/linux-user/loongarch64/target_structs.h
new file mode 100644
index 00..6041441e15
--- /dev/null
+++ b/linux-user/loongarch64/target_structs.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_STRUCTS_H
+#define LOONGARCH_TARGET_STRUCTS_H
+
+#include "../generic/target_structs.h"
+
+#endif
diff --git a/linux-user/loongarch64/termbits.h 
b/linux-user/loongarch64/termbits.h
new file mode 100644
index 00..d425db8748
--- /dev/null
+++ b/linux-user/loongarch64/termbits.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_TERMBITS_H
+#define LOONGARCH_TARGET_TERMBITS_H
+
+#include "../generic/termbits.h"
+
+#endif
-- 
2.31.1

[PATCH v17 04/13] linux-user: Add LoongArch syscall support

2022-06-16 Thread Song Gao

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/loongarch64/syscall_nr.h | 312 
 linux-user/loongarch64/target_syscall.h |  48 
 linux-user/syscall_defs.h   |   6 +-
 scripts/gensyscalls.sh  |   2 +
 4 files changed, 367 insertions(+), 1 deletion(-)
 create mode 100644 linux-user/loongarch64/syscall_nr.h
 create mode 100644 linux-user/loongarch64/target_syscall.h

diff --git a/linux-user/loongarch64/syscall_nr.h 
b/linux-user/loongarch64/syscall_nr.h
new file mode 100644
index 00..be00915adf
--- /dev/null
+++ b/linux-user/loongarch64/syscall_nr.h
@@ -0,0 +1,312 @@
+/*
+ * This file contains the system call numbers.
+ * Do not modify.
+ * This file is generated by scripts/gensyscalls.sh
+ */
+#ifndef LINUX_USER_LOONGARCH_SYSCALL_NR_H
+#define LINUX_USER_LOONGARCH_SYSCALL_NR_H
+
+#define TARGET_NR_io_setup 0
+#define TARGET_NR_io_destroy 1
+#define TARGET_NR_io_submit 2
+#define TARGET_NR_io_cancel 3
+#define TARGET_NR_io_getevents 4
+#define TARGET_NR_setxattr 5
+#define TARGET_NR_lsetxattr 6
+#define TARGET_NR_fsetxattr 7
+#define TARGET_NR_getxattr 8
+#define TARGET_NR_lgetxattr 9
+#define TARGET_NR_fgetxattr 10
+#define TARGET_NR_listxattr 11
+#define TARGET_NR_llistxattr 12
+#define TARGET_NR_flistxattr 13
+#define TARGET_NR_removexattr 14
+#define TARGET_NR_lremovexattr 15
+#define TARGET_NR_fremovexattr 16
+#define TARGET_NR_getcwd 17
+#define TARGET_NR_lookup_dcookie 18
+#define TARGET_NR_eventfd2 19
+#define TARGET_NR_epoll_create1 20
+#define TARGET_NR_epoll_ctl 21
+#define TARGET_NR_epoll_pwait 22
+#define TARGET_NR_dup 23
+#define TARGET_NR_dup3 24
+#define TARGET_NR_fcntl 25
+#define TARGET_NR_inotify_init1 26
+#define TARGET_NR_inotify_add_watch 27
+#define TARGET_NR_inotify_rm_watch 28
+#define TARGET_NR_ioctl 29
+#define TARGET_NR_ioprio_set 30
+#define TARGET_NR_ioprio_get 31
+#define TARGET_NR_flock 32
+#define TARGET_NR_mknodat 33
+#define TARGET_NR_mkdirat 34
+#define TARGET_NR_unlinkat 35
+#define TARGET_NR_symlinkat 36
+#define TARGET_NR_linkat 37
+#define TARGET_NR_umount2 39
+#define TARGET_NR_mount 40
+#define TARGET_NR_pivot_root 41
+#define TARGET_NR_nfsservctl 42
+#define TARGET_NR_statfs 43
+#define TARGET_NR_fstatfs 44
+#define TARGET_NR_truncate 45
+#define TARGET_NR_ftruncate 46
+#define TARGET_NR_fallocate 47
+#define TARGET_NR_faccessat 48
+#define TARGET_NR_chdir 49
+#define TARGET_NR_fchdir 50
+#define TARGET_NR_chroot 51
+#define TARGET_NR_fchmod 52
+#define TARGET_NR_fchmodat 53
+#define TARGET_NR_fchownat 54
+#define TARGET_NR_fchown 55
+#define TARGET_NR_openat 56
+#define TARGET_NR_close 57
+#define TARGET_NR_vhangup 58
+#define TARGET_NR_pipe2 59
+#define TARGET_NR_quotactl 60
+#define TARGET_NR_getdents64 61
+#define TARGET_NR_lseek 62
+#define TARGET_NR_read 63
+#define TARGET_NR_write 64
+#define TARGET_NR_readv 65
+#define TARGET_NR_writev 66
+#define TARGET_NR_pread64 67
+#define TARGET_NR_pwrite64 68
+#define TARGET_NR_preadv 69
+#define TARGET_NR_pwritev 70
+#define TARGET_NR_sendfile 71
+#define TARGET_NR_pselect6 72
+#define TARGET_NR_ppoll 73
+#define TARGET_NR_signalfd4 74
+#define TARGET_NR_vmsplice 75
+#define TARGET_NR_splice 76
+#define TARGET_NR_tee 77
+#define TARGET_NR_readlinkat 78
+#define TARGET_NR_sync 81
+#define TARGET_NR_fsync 82
+#define TARGET_NR_fdatasync 83
+#define TARGET_NR_sync_file_range 84
+#define TARGET_NR_timerfd_create 85
+#define TARGET_NR_timerfd_settime 86
+#define TARGET_NR_timerfd_gettime 87
+#define TARGET_NR_utimensat 88
+#define TARGET_NR_acct 89
+#define TARGET_NR_capget 90
+#define TARGET_NR_capset 91
+#define TARGET_NR_personality 92
+#define TARGET_NR_exit 93
+#define TARGET_NR_exit_group 94
+#define TARGET_NR_waitid 95
+#define TARGET_NR_set_tid_address 96
+#define TARGET_NR_unshare 97
+#define TARGET_NR_futex 98
+#define TARGET_NR_set_robust_list 99
+#define TARGET_NR_get_robust_list 100
+#define TARGET_NR_nanosleep 101
+#define TARGET_NR_getitimer 102
+#define TARGET_NR_setitimer 103
+#define TARGET_NR_kexec_load 104
+#define TARGET_NR_init_module 105
+#define TARGET_NR_delete_module 106
+#define TARGET_NR_timer_create 107
+#define TARGET_NR_timer_gettime 108
+#define TARGET_NR_timer_getoverrun 109
+#define TARGET_NR_timer_settime 110
+#define TARGET_NR_timer_delete 111
+#define TARGET_NR_clock_settime 112
+#define TARGET_NR_clock_gettime 113
+#define TARGET_NR_clock_getres 114
+#define TARGET_NR_clock_nanosleep 115
+#define TARGET_NR_syslog 116
+#define TARGET_NR_ptrace 117
+#define TARGET_NR_sched_setparam 118
+#define TARGET_NR_sched_setscheduler 119
+#define TARGET_NR_sched_getscheduler 120
+#define TARGET_NR_sched_getparam 121
+#define TARGET_NR_sched_setaffinity 122
+#define TARGET_NR_sched_getaffinity 123
+#define TARGET_NR_sched_yield 124
+#define TARGET_NR_sched_get_priority_max 125
+#define TARGET_NR_sched_get_priority_min 126
+#define TARGET_NR_sc

[PATCH v17 03/13] linux-user: Add LoongArch elf support

2022-06-16 Thread Song Gao

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/elfload.c| 91 +
 linux-user/loongarch64/target_elf.h | 12 
 2 files changed, 103 insertions(+)
 create mode 100644 linux-user/loongarch64/target_elf.h

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index f7eae357f4..7351d0e089 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -918,6 +918,97 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, 
const CPUPPCState *en
 
 #endif
 
+#ifdef TARGET_LOONGARCH64
+
+#define ELF_START_MMAP 0x8000
+
+#define ELF_CLASS   ELFCLASS64
+#define ELF_ARCHEM_LOONGARCH
+
+#define elf_check_arch(x) ((x) == EM_LOONGARCH)
+
+static inline void init_thread(struct target_pt_regs *regs,
+   struct image_info *infop)
+{
+/*Set crmd PG,DA = 1,0 */
+regs->csr.crmd = 2 << 3;
+regs->csr.era = infop->entry;
+regs->regs[3] = infop->start_stack;
+}
+
+/* See linux kernel: arch/loongarch/include/asm/elf.h */
+#define ELF_NREG 45
+typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
+
+enum {
+TARGET_EF_R0 = 0,
+TARGET_EF_CSR_ERA = TARGET_EF_R0 + 33,
+TARGET_EF_CSR_BADV = TARGET_EF_R0 + 34,
+};
+
+static void elf_core_copy_regs(target_elf_gregset_t *regs,
+   const CPULoongArchState *env)
+{
+int i;
+
+(*regs)[TARGET_EF_R0] = 0;
+
+for (i = 1; i < ARRAY_SIZE(env->gpr); i++) {
+(*regs)[TARGET_EF_R0 + i] = tswapreg(env->gpr[i]);
+}
+
+(*regs)[TARGET_EF_CSR_ERA] = tswapreg(env->pc);
+(*regs)[TARGET_EF_CSR_BADV] = tswapreg(env->CSR_BADV);
+}
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE4096
+
+#define ELF_HWCAP get_elf_hwcap()
+
+/* See arch/loongarch/include/uapi/asm/hwcap.h */
+enum {
+HWCAP_LOONGARCH_CPUCFG   = (1 << 0),
+HWCAP_LOONGARCH_LAM  = (1 << 1),
+HWCAP_LOONGARCH_UAL  = (1 << 2),
+HWCAP_LOONGARCH_FPU  = (1 << 3),
+HWCAP_LOONGARCH_LSX  = (1 << 4),
+HWCAP_LOONGARCH_LASX = (1 << 5),
+HWCAP_LOONGARCH_CRC32= (1 << 6),
+HWCAP_LOONGARCH_COMPLEX  = (1 << 7),
+HWCAP_LOONGARCH_CRYPTO   = (1 << 8),
+HWCAP_LOONGARCH_LVZ  = (1 << 9),
+HWCAP_LOONGARCH_LBT_X86  = (1 << 10),
+HWCAP_LOONGARCH_LBT_ARM  = (1 << 11),
+HWCAP_LOONGARCH_LBT_MIPS = (1 << 12),
+};
+
+static uint32_t get_elf_hwcap(void)
+{
+LoongArchCPU *cpu = LOONGARCH_CPU(thread_cpu);
+uint32_t hwcaps = 0;
+
+hwcaps |= HWCAP_LOONGARCH_CRC32;
+
+if (FIELD_EX32(cpu->env.cpucfg[1], CPUCFG1, UAL)) {
+hwcaps |= HWCAP_LOONGARCH_UAL;
+}
+
+if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, FP)) {
+hwcaps |= HWCAP_LOONGARCH_FPU;
+}
+
+if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LAM)) {
+hwcaps |= HWCAP_LOONGARCH_LAM;
+}
+
+return hwcaps;
+}
+
+#define ELF_PLATFORM "loongarch"
+
+#endif /* TARGET_LOONGARCH64 */
+
 #ifdef TARGET_MIPS
 
 #define ELF_START_MMAP 0x8000
diff --git a/linux-user/loongarch64/target_elf.h 
b/linux-user/loongarch64/target_elf.h
new file mode 100644
index 00..95c3f05a46
--- /dev/null
+++ b/linux-user/loongarch64/target_elf.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_ELF_H
+#define LOONGARCH_TARGET_ELF_H
+static inline const char *cpu_get_model(uint32_t eflags)
+{
+return "la464";
+}
+#endif
-- 
2.31.1

[PATCH v17 00/13] Add LoongArch linux-user emulation support

2022-06-16 Thread Song Gao

Hi All,

This series add support linux-user emulation.
As the LoongArch kernel had merged into 5.19-rc1,
you can see the latest kernel at https://kernel.org

Need review patches:

  0002-linux-user-Add-LoongArch-signal-support.patch
  0007-target-loongarch-remove-badaddr-from-CPULoongArch.patch
  0008-target-loongarch-Fix-missing-update-CSR_BADV.patch
  0009-target-loongarch-Fix-helper_asrtle_d-asrtgt_d-raise-.patch
  0010-target-loongarch-remove-unused-include-hw-loader.h.patch
  0011-target-loongarch-Adjust-functions-and-structure-to-s.patch


V17:
  - Split v16 patch7 to  patch7-11, and fix some bugs for system-mode;
  - Update signal.c, add parse_extcontext();
  - Add get_elf_hwcap(), and ELF_PLATFORM.

V16:
  - Update signal.c;
  - Update helper_rdtime_d();
  - Update scripts/gensyscalls.sh, fixed a warning.

v15:
  - Rebase;
  - Update README;
  - Adjust some functions and structure to support user-mode;
  - Update syscall;
  - Update target_sigcontext;

Old series:
   - https://patchew.org/QEMU/20220614090536.1103616-1-gaos...@loongson.cn/

Test:
   - user-mode:
   make check  && make check-tcg  &&  run LoongArch bash
   - system-mode
   make check  && make check-tcg

Thanks.
Song Gao


Song Gao (13):
  linux-user: Add LoongArch generic header files
  linux-user: Add LoongArch signal support
  linux-user: Add LoongArch elf support
  linux-user: Add LoongArch syscall support
  linux-user: Add LoongArch cpu_loop support
  scripts: add loongarch64 binfmt config
  target/loongarch: remove badaddr from CPULoongArch
  target/loongarch: Fix missing update CSR_BADV
  target/loongarch: Fix helper_asrtle_d/asrtgt_d raise wrong exception
  target/loongarch: remove unused include hw/loader.h
  target/loongarch: Adjust functions and structure to support user-mode
  default-configs: Add loongarch linux-user support
  target/loongarch: Update README

 configs/targets/loongarch64-linux-user.mak|   3 +
 linux-user/elfload.c  |  91 +
 linux-user/loongarch64/cpu_loop.c |  96 ++
 linux-user/loongarch64/signal.c   | 312 ++
 linux-user/loongarch64/sockbits.h |  11 +
 linux-user/loongarch64/syscall_nr.h   | 312 ++
 linux-user/loongarch64/target_cpu.h   |  34 ++
 linux-user/loongarch64/target_elf.h   |  12 +
 linux-user/loongarch64/target_errno_defs.h|  12 +
 linux-user/loongarch64/target_fcntl.h |  11 +
 linux-user/loongarch64/target_prctl.h |   1 +
 linux-user/loongarch64/target_resource.h  |  11 +
 linux-user/loongarch64/target_signal.h|  13 +
 linux-user/loongarch64/target_structs.h   |  11 +
 linux-user/loongarch64/target_syscall.h   |  48 +++
 linux-user/loongarch64/termbits.h |  11 +
 linux-user/syscall_defs.h |   6 +-
 scripts/gensyscalls.sh|   2 +
 scripts/qemu-binfmt-conf.sh   |   6 +-
 target/loongarch/README   |  39 ++-
 target/loongarch/cpu.c|  34 +-
 target/loongarch/cpu.h|   8 +-
 target/loongarch/gdbstub.c|   2 +-
 target/loongarch/helper.h |   2 +
 .../insn_trans/trans_privileged.c.inc |  36 ++
 target/loongarch/internals.h  |   2 +
 target/loongarch/op_helper.c  |  10 +-
 27 files changed, 1121 insertions(+), 15 deletions(-)
 create mode 100644 configs/targets/loongarch64-linux-user.mak
 create mode 100644 linux-user/loongarch64/cpu_loop.c
 create mode 100644 linux-user/loongarch64/signal.c
 create mode 100644 linux-user/loongarch64/sockbits.h
 create mode 100644 linux-user/loongarch64/syscall_nr.h
 create mode 100644 linux-user/loongarch64/target_cpu.h
 create mode 100644 linux-user/loongarch64/target_elf.h
 create mode 100644 linux-user/loongarch64/target_errno_defs.h
 create mode 100644 linux-user/loongarch64/target_fcntl.h
 create mode 100644 linux-user/loongarch64/target_prctl.h
 create mode 100644 linux-user/loongarch64/target_resource.h
 create mode 100644 linux-user/loongarch64/target_signal.h
 create mode 100644 linux-user/loongarch64/target_structs.h
 create mode 100644 linux-user/loongarch64/target_syscall.h
 create mode 100644 linux-user/loongarch64/termbits.h

-- 
2.31.1

Re: [PATCH] hw/riscv: virt: pass random seed to fdt

2022-06-16 Thread Alistair Francis

On Thu, Jun 16, 2022 at 8:01 PM Jason A. Donenfeld  wrote:
>
> Hi Alistair,
>
> On Thu, Jun 16, 2022 at 12:32:36PM +1000, Alistair Francis wrote:
> > Applied to riscv-to-apply.next with the full stop removed
>
> Great, thanks. Just wondering: am I looking in the right repo? I don't
> see it here: https://github.com/alistair23/qemu/commits/riscv-to-apply.next

That's the right repo, I just have to push the latest updates. You
should see it there tomorrow

Alistair

>
> Jason

[PATCH v17 11/13] target/loongarch: Adjust functions and structure to support user-mode

2022-06-16 Thread Song Gao

Some functions and member of the structure are different with softmmu-mode
So we need adjust them to support user-mode.

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
---
 target/loongarch/cpu.c| 21 ++-
 target/loongarch/cpu.h|  6 
 target/loongarch/helper.h |  2 ++
 .../insn_trans/trans_privileged.c.inc | 36 +++
 target/loongarch/internals.h  |  2 ++
 target/loongarch/op_helper.c  |  6 
 6 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index bf163a8dce..47c0bdd1ac 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -82,6 +82,7 @@ static void loongarch_cpu_set_pc(CPUState *cs, vaddr value)
 env->pc = value;
 }
 
+#ifndef CONFIG_USER_ONLY
 #include "hw/loongarch/virt.h"
 
 void loongarch_cpu_set_irq(void *opaque, int irq, int level)
@@ -295,6 +296,7 @@ static bool loongarch_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 }
 return false;
 }
+#endif
 
 #ifdef CONFIG_TCG
 static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
@@ -309,6 +311,9 @@ static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
 
 static bool loongarch_cpu_has_work(CPUState *cs)
 {
+#ifdef CONFIG_USER_ONLY
+return true;
+#else
 LoongArchCPU *cpu = LOONGARCH_CPU(cs);
 CPULoongArchState *env = &cpu->env;
 bool has_work = false;
@@ -319,6 +324,7 @@ static bool loongarch_cpu_has_work(CPUState *cs)
 }
 
 return has_work;
+#endif
 }
 
 static void loongarch_la464_initfn(Object *obj)
@@ -467,7 +473,9 @@ static void loongarch_cpu_reset(DeviceState *dev)
 env->CSR_DMW[n] = FIELD_DP64(env->CSR_DMW[n], CSR_DMW, PLV3, 0);
 }
 
+#ifndef CONFIG_USER_ONLY
 env->pc = 0x1c00;
+#endif
 
 restore_fp_status(env);
 cs->exception_index = -1;
@@ -498,6 +506,7 @@ static void loongarch_cpu_realizefn(DeviceState *dev, Error 
**errp)
 lacc->parent_realize(dev, errp);
 }
 
+#ifndef CONFIG_USER_ONLY
 static void loongarch_qemu_write(void *opaque, hwaddr addr,
  uint64_t val, unsigned size)
 {
@@ -532,13 +541,16 @@ static const MemoryRegionOps loongarch_qemu_ops = {
 .max_access_size = 8,
 },
 };
+#endif
 
 static void loongarch_cpu_init(Object *obj)
 {
 LoongArchCPU *cpu = LOONGARCH_CPU(obj);
-CPULoongArchState *env = &cpu->env;
 
 cpu_set_cpustate_pointers(cpu);
+
+#ifndef CONFIG_USER_ONLY
+CPULoongArchState *env = &cpu->env;
 qdev_init_gpio_in(DEVICE(cpu), loongarch_cpu_set_irq, N_IRQS);
 timer_init_ns(&cpu->timer, QEMU_CLOCK_VIRTUAL,
   &loongarch_constant_timer_cb, cpu);
@@ -548,6 +560,7 @@ static void loongarch_cpu_init(Object *obj)
 memory_region_init_io(&env->iocsr_mem, OBJECT(cpu), &loongarch_qemu_ops,
   NULL, "iocsr_misc", 0x428);
 memory_region_add_subregion(&env->system_iocsr, 0, &env->iocsr_mem);
+#endif
 }
 
 static ObjectClass *loongarch_cpu_class_by_name(const char *cpu_model)
@@ -615,18 +628,22 @@ static struct TCGCPUOps loongarch_tcg_ops = {
 .initialize = loongarch_translate_init,
 .synchronize_from_tb = loongarch_cpu_synchronize_from_tb,
 
+#ifndef CONFIG_USER_ONLY
 .tlb_fill = loongarch_cpu_tlb_fill,
 .cpu_exec_interrupt = loongarch_cpu_exec_interrupt,
 .do_interrupt = loongarch_cpu_do_interrupt,
 .do_transaction_failed = loongarch_cpu_do_transaction_failed,
+#endif
 };
 #endif /* CONFIG_TCG */
 
+#ifndef CONFIG_USER_ONLY
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps loongarch_sysemu_ops = {
 .get_phys_page_debug = loongarch_cpu_get_phys_page_debug,
 };
+#endif
 
 static void loongarch_cpu_class_init(ObjectClass *c, void *data)
 {
@@ -642,8 +659,10 @@ static void loongarch_cpu_class_init(ObjectClass *c, void 
*data)
 cc->has_work = loongarch_cpu_has_work;
 cc->dump_state = loongarch_cpu_dump_state;
 cc->set_pc = loongarch_cpu_set_pc;
+#ifndef CONFIG_USER_ONLY
 dc->vmsd = &vmstate_loongarch_cpu;
 cc->sysemu_ops = &loongarch_sysemu_ops;
+#endif
 cc->disas_set_info = loongarch_cpu_disas_set_info;
 cc->gdb_read_register = loongarch_cpu_gdb_read_register;
 cc->gdb_write_register = loongarch_cpu_gdb_write_register;
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 4b4fbcdc71..d141ec9b5d 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -301,6 +301,7 @@ typedef struct CPUArchState {
 uint64_t CSR_DERA;
 uint64_t CSR_DSAVE;
 
+#ifndef CONFIG_USER_ONLY
 LoongArchTLB  tlb[LOONGARCH_TLB_MAX];
 
 AddressSpace address_space_iocsr;
@@ -308,6 +309,7 @@ typedef struct CPUArchState {
 MemoryRegion iocsr_mem;
 bool load_elf;
 uint64_t elf_address;
+#endif
 } CPULoongArchState;
 
 /**
@@ -358,12 +360,16 @@ struct LoongArchCPUClass {
 
 static inline int cpu_mmu_index(CPULoongArchState *env, boo

[PATCH v17 13/13] target/loongarch: Update README

2022-06-16 Thread Song Gao

Add linux-user emulation introduction

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
---
 target/loongarch/README | 39 +--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/target/loongarch/README b/target/loongarch/README
index 4dcd0f1682..9f5edd10c8 100644
--- a/target/loongarch/README
+++ b/target/loongarch/README
@@ -24,9 +24,9 @@
 
 Download cross-tools.
 
-  wget 
https://github.com/loongson/build-tools/releases/latest/download/loongarch64-clfs-20211202-cross-tools.tar.xz
+  wget 
https://github.com/loongson/build-tools/releases/download/2022.05.29/loongarch64-clfs-5.0-cross-tools-gcc-full.tar.xz
 
-  tar -vxf loongarch64-clfs-20211202-cross-tools.tar.xz -C /opt
+  tar -vxf loongarch64-clfs-5.0-cross-tools-gcc-full.tar.xz -C /opt
 
 Config cross-tools env.
 
@@ -60,5 +60,40 @@
 
 ./build/qemu-system-loongarch64 -machine virt -m 4G -cpu Loongson-3A5000 
-smp 1 -kernel build/tests/tcg/loongarch64-softmmu/hello -monitor none -display 
none -chardev file,path=hello.out,id=output -serial chardev:output
 
+- Linux-user emulation
+
+  We already support Linux user emulation. We can use LoongArch cross-tools to 
build LoongArch executables on X86 machines,
+  and We can also use qemu-loongarch64 to run LoongArch executables.
+
+  1. Config cross-tools env.
+
+ see System emulation.
+
+  2. Test tests/tcg/multiarch.
+
+ ./configure  --static  --prefix=/usr  --disable-werror 
--target-list="loongarch64-linux-user" --enable-debug
+
+ cd build
+
+ make && make check-tcg
+
+  3. Run LoongArch system basic command with loongarch-clfs-system.
+
+ - Config clfs env.
+
+   wget 
https://github.com/loongson/build-tools/releases/download/2022.05.29/loongarch64-clfs-system-5.0.tar.bz2
+
+   tar -vxf loongarch64-clfs-system-5.0.tar.bz2 -C /opt/clfs
+
+   cp /opt/clfs/lib64/ld-linux-loongarch-lp64d.so.1  /lib64
+
+   export LD_LIBRARY_PATH="/opt/clfs/lib64"
+
+ - Run LoongArch system basic command.
+
+   ./qemu-loongarch64  /opt/clfs/usr/bin/bash
+   ./qemu-loongarch64  /opt/clfs/usr/bin/ls
+   ./qemu-loongarch64  /opt/clfs/usr/bin/pwd
+
 - Note.
   We can get the latest LoongArch documents or LoongArch tools at 
https://github.com/loongson/
-- 
2.31.1

1 2 3 >

1 - 100 of 232 matches

Mail list logo