date:20240704

Re: [PATCH 27/43] target/ppc/mmu_common.c: Remove mmu_ctx_t

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Completely get rid of mmu_ctx_t after converting the remaining
> functions to pass raddr and prot without the context struct.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 25 +++--
>  1 file changed, 7 insertions(+), 18 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 4770b43630..60f8736210 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -37,12 +37,6 @@
>  
>  /* #define DUMP_PAGE_TABLES */
>  
> -/* Context used internally during MMU translations */
> -typedef struct {
> -hwaddr raddr;  /* Real address */
> -int prot;  /* Protection bits  */
> -} mmu_ctx_t;
> -
>  void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
>  {
>  PowerPCCPU *cpu = env_archcpu(env);
> @@ -264,8 +258,8 @@ static int get_bat_6xx_tlb(CPUPPCState *env, hwaddr 
> *raddr, int *prot,
>  return ret;
>  }
>  
> -static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
> -   target_ulong eaddr,
> +static int mmu6xx_get_physical_address(CPUPPCState *env, hwaddr *raddr,
> +   int *prot, target_ulong eaddr,
> hwaddr *hashp, bool *keyp,
> MMUAccessType access_type, int type)
>  {
> @@ -277,8 +271,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  
>  /* First try to find a BAT entry if there are any */
>  if (env->nb_BATs &&
> -get_bat_6xx_tlb(env, &ctx->raddr, &ctx->prot, eaddr,
> -access_type, pr) == 0) {
> +get_bat_6xx_tlb(env, raddr, prot, eaddr, access_type, pr) == 0) {
>  return 0;
>  }
>  
> @@ -316,7 +309,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  *hashp = hash;
>  
>  /* Software TLB search */
> -return ppc6xx_tlb_check(env, &ctx->raddr, &ctx->prot, eaddr,
> +return ppc6xx_tlb_check(env, raddr, prot, eaddr,
>  access_type, ptem, key, nx);
>  }
>  
> @@ -333,7 +326,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>   * Should make the instruction do no-op.  As it already do
>   * no-op, it's quite easy :-)
>   */
> -ctx->raddr = eaddr;
> +*raddr = eaddr;
>  return 0;
>  case ACCESS_CODE: /* No code fetch is allowed in direct-store areas */
>  case ACCESS_FLOAT: /* Floating point load/store */
> @@ -343,7 +336,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  }
>  if ((access_type == MMU_DATA_STORE || !key) &&
>  (access_type == MMU_DATA_LOAD || key)) {
> -ctx->raddr = eaddr;
> +*raddr = eaddr;
>  return 2;
>  }
>  return -2;
> @@ -681,7 +674,6 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr,
>  {
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = &cpu->env;
> -mmu_ctx_t ctx;
>  hwaddr hash = 0; /* init to 0 to avoid used uninit warning */
>  bool key;
>  int type, ret;
> @@ -700,12 +692,9 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr,
>  type = ACCESS_INT;
>  }
>  
> -ctx.prot = 0;
> -ret = mmu6xx_get_physical_address(env, &ctx, eaddr, &hash, &key,
> +ret = mmu6xx_get_physical_address(env, raddrp, protp, eaddr, &hash, &key,
>access_type, type);
>  if (ret == 0) {
> -*raddrp = ctx.raddr;
> -*protp = ctx.prot;
>  *psizep = TARGET_PAGE_BITS;
>  return true;
>  } else if (!guest_visible) {

Re: [PATCH 26/43] target/ppc/mmu_common.c: Stop using ctx in get_bat_6xx_tlb()

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Pass raddr and prot in function parameters instead

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 17 +
>  1 file changed, 9 insertions(+), 8 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 624ed51a92..4770b43630 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -193,7 +193,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env, hwaddr 
> *raddr, int *prot,
>  return ret;
>  }
>  
> -static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
> +static int get_bat_6xx_tlb(CPUPPCState *env, hwaddr *raddr, int *prot,
> target_ulong eaddr, MMUAccessType access_type,
> bool pr)
>  {
> @@ -224,16 +224,16 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>  if ((eaddr & BATU32_BEPIU) == BEPIu &&
>  ((eaddr & BATU32_BEPIL) & ~bl) == BEPIl) {
>  /* Get physical address */
> -ctx->raddr = (*BATl & BATU32_BEPIU) |
> +*raddr = (*BATl & BATU32_BEPIU) |
>  ((eaddr & BATU32_BEPIL & bl) | (*BATl & BATU32_BEPIL)) |
>  (eaddr & 0x0001F000);
>  /* Compute access rights */
> -ctx->prot = ppc_hash32_bat_prot(*BATu, *BATl);
> -if (check_prot_access_type(ctx->prot, access_type)) {
> +*prot = ppc_hash32_bat_prot(*BATu, *BATl);
> +if (check_prot_access_type(*prot, access_type)) {
>  qemu_log_mask(CPU_LOG_MMU, "BAT %d match: r " 
> HWADDR_FMT_plx
> -  " prot=%c%c\n", i, ctx->raddr,
> -  ctx->prot & PAGE_READ ? 'R' : '-',
> -  ctx->prot & PAGE_WRITE ? 'W' : '-');
> +  " prot=%c%c\n", i, *raddr,
> +  *prot & PAGE_READ ? 'R' : '-',
> +  *prot & PAGE_WRITE ? 'W' : '-');
>  ret = 0;
>  } else {
>  ret = -2;
> @@ -277,7 +277,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  
>  /* First try to find a BAT entry if there are any */
>  if (env->nb_BATs &&
> -get_bat_6xx_tlb(env, ctx, eaddr, access_type, pr) == 0) {
> +get_bat_6xx_tlb(env, &ctx->raddr, &ctx->prot, eaddr,
> +access_type, pr) == 0) {
>  return 0;
>  }
>

Re: [PATCH 28/43] target/ppc/mmu-hash32.c: Inline and remove ppc_hash32_pte_raddr()

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> This function is used only once and does not add more clarity than
> doing it inline.
>
> Signed-off-by: BALATON Zoltan 

Ah, not really sure I agree. Yes I suppose in this case because it
has that comment. But you could instead remove the comment and
leave the function there (because the comment is redundant with
the function name), and then your main function is 1 line
instead of 4.

Don't remove functions just because they're called once, if they
are a nice self-contained and well named thing. But okay for here
I suppose.

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/mmu-hash32.c | 18 +-
>  1 file changed, 5 insertions(+), 13 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 6f0f0bbb00..c4de1647e2 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -298,15 +298,6 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
>  return pte_offset;
>  }
>  
> -static hwaddr ppc_hash32_pte_raddr(target_ulong sr, ppc_hash_pte32_t pte,
> -   target_ulong eaddr)
> -{
> -hwaddr rpn = pte.pte1 & HPTE32_R_RPN;
> -hwaddr mask = ~TARGET_PAGE_MASK;
> -
> -return (rpn & ~mask) | (eaddr & mask);
> -}
> -
>  bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType 
> access_type,
>hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
>bool guest_visible)
> @@ -440,11 +431,12 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>   */
>  prot &= ~PAGE_WRITE;
>  }
> - }
> +}
> +*protp = prot;
>  
>  /* 9. Determine the real address from the PTE */
> -
> -*raddrp = ppc_hash32_pte_raddr(sr, pte, eaddr);
> -*protp = prot;
> +*raddrp = pte.pte1 & HPTE32_R_RPN;
> +*raddrp &= TARGET_PAGE_MASK;
> +*raddrp |= eaddr & ~TARGET_PAGE_MASK;
>  return true;
>  }

Re: [PATCH 29/43] target/ppc/mmu-hash32.c: Move get_pteg_offset32() to the header

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> This function is a simple shared function, move it to other similar
> static inline functions in the header.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 7 ---
>  target/ppc/mmu-hash32.h | 6 +-
>  2 files changed, 5 insertions(+), 8 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index c4de1647e2..44b16142ab 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -201,13 +201,6 @@ static bool ppc_hash32_direct_store(PowerPCCPU *cpu, 
> target_ulong sr,
>  return false;
>  }
>  
> -hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
> -{
> -target_ulong mask = ppc_hash32_hpt_mask(cpu);
> -
> -return (hash * HASH_PTEG_SIZE_32) & mask;
> -}
> -
>  static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
>   bool secondary, target_ulong ptem,
>   ppc_hash_pte32_t *pte)
> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
> index bd75f7d647..2838de031c 100644
> --- a/target/ppc/mmu-hash32.h
> +++ b/target/ppc/mmu-hash32.h
> @@ -3,7 +3,6 @@
>  
>  #ifndef CONFIG_USER_ONLY
>  
> -hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash);
>  bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType 
> access_type,
>hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
>bool guest_visible);
> @@ -102,6 +101,11 @@ static inline void ppc_hash32_store_hpte1(PowerPCCPU 
> *cpu,
>  stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
>  }
>  
> +static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
> +{
> +return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);
> +}
> +
>  static inline bool ppc_hash32_key(bool pr, target_ulong sr)
>  {
>  return pr ? (sr & SR32_KP) : (sr & SR32_KS);

Re: [PATCH 30/43] target/ppc: Unexport some functions from mmu-book3s-v3.h

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> The ppc_hash64_hpt_base() and ppc_hash64_hpt_mask() functions are
> mostly used by mmu-hash64.c only but there is one call to
> ppc_hash64_hpt_mask() in hw/ppc/spapr_vhyp_mmu.c.in a helper function
> that can be moved to mmu-hash64.c which allows these functions to be
> removed from the header.
>

Fine. Probably too big to inline anyway.

Reviewed-by: Nicholas Piggin 

> Signed-off-by: BALATON Zoltan 
> ---
>  hw/ppc/spapr_vhyp_mmu.c| 21 
>  target/ppc/mmu-book3s-v3.h | 40 ---
>  target/ppc/mmu-hash64.c| 49 ++
>  target/ppc/mmu-hash64.h|  1 +
>  4 files changed, 54 insertions(+), 57 deletions(-)
>
> diff --git a/hw/ppc/spapr_vhyp_mmu.c b/hw/ppc/spapr_vhyp_mmu.c
> index b3dd8b3a59..2d41d7f77b 100644
> --- a/hw/ppc/spapr_vhyp_mmu.c
> +++ b/hw/ppc/spapr_vhyp_mmu.c
> @@ -15,19 +15,6 @@
>  #include "helper_regs.h"
>  #include "hw/ppc/spapr.h"
>  #include "mmu-hash64.h"
> -#include "mmu-book3s-v3.h"
> -
> -
> -static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
> -{
> -/*
> - * hash value/pteg group index is normalized by HPT mask
> - */
> -if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
> -return false;
> -}
> -return true;
> -}
>  
>  static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
>  target_ulong opcode, target_ulong *args)
> @@ -70,7 +57,7 @@ static target_ulong h_enter(PowerPCCPU *cpu, 
> SpaprMachineState *spapr,
>  
>  pteh &= ~0x60ULL;
>  
> -if (!valid_ptex(cpu, ptex)) {
> +if (!ppc_hash64_valid_ptex(cpu, ptex)) {
>  return H_PARAMETER;
>  }
>  
> @@ -119,7 +106,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu
>  const ppc_hash_pte64_t *hptes;
>  target_ulong v, r;
>  
> -if (!valid_ptex(cpu, ptex)) {
> +if (!ppc_hash64_valid_ptex(cpu, ptex)) {
>  return REMOVE_PARM;
>  }
>  
> @@ -250,7 +237,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, 
> SpaprMachineState *spapr,
>  const ppc_hash_pte64_t *hptes;
>  target_ulong v, r;
>  
> -if (!valid_ptex(cpu, ptex)) {
> +if (!ppc_hash64_valid_ptex(cpu, ptex)) {
>  return H_PARAMETER;
>  }
>  
> @@ -287,7 +274,7 @@ static target_ulong h_read(PowerPCCPU *cpu, 
> SpaprMachineState *spapr,
>  int i, ridx, n_entries = 1;
>  const ppc_hash_pte64_t *hptes;
>  
> -if (!valid_ptex(cpu, ptex)) {
> +if (!ppc_hash64_valid_ptex(cpu, ptex)) {
>  return H_PARAMETER;
>  }
>  
> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
> index f3f7993958..263ce55c1f 100644
> --- a/target/ppc/mmu-book3s-v3.h
> +++ b/target/ppc/mmu-book3s-v3.h
> @@ -83,46 +83,6 @@ static inline bool ppc64_v3_radix(PowerPCCPU *cpu)
>  return !!(cpu->env.spr[SPR_LPCR] & LPCR_HR);
>  }
>  
> -static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
> -{
> -uint64_t base;
> -
> -if (cpu->vhyp) {
> -return 0;
> -}
> -if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
> -ppc_v3_pate_t pate;
> -
> -if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
> -return 0;
> -}
> -base = pate.dw0;
> -} else {
> -base = cpu->env.spr[SPR_SDR1];
> -}
> -return base & SDR_64_HTABORG;
> -}
> -
> -static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
> -{
> -uint64_t base;
> -
> -if (cpu->vhyp) {
> -return cpu->vhyp_class->hpt_mask(cpu->vhyp);
> -}
> -if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
> -ppc_v3_pate_t pate;
> -
> -if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
> -return 0;
> -}
> -base = pate.dw0;
> -} else {
> -base = cpu->env.spr[SPR_SDR1];
> -}
> -return (1ULL << ((base & SDR_64_HTABSIZE) + 18 - 7)) - 1;
> -}
> -
>  #endif /* TARGET_PPC64 */
>  
>  #endif /* CONFIG_USER_ONLY */
> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> index cbc8efa0c3..7bc0323f26 100644
> --- a/target/ppc/mmu-hash64.c
> +++ b/target/ppc/mmu-hash64.c
> @@ -508,6 +508,46 @@ static int ppc_hash64_amr_prot(PowerPCCPU *cpu, 
> ppc_hash_pte64_t pte)
>  return prot;
>  }
>  
> +static hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
> +{
> +uint64_t base;
> +
> +if (cpu->vhyp) {
> +return 0;
> +}
> +if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
> +ppc_v3_pate_t pate;
> +
> +if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
> +return 0;
> +}
> +base = pate.dw0;
> +} else {
> +base = cpu->env.spr[SPR_SDR1];
> +}
> +return base & SDR_64_HTABORG;
> +}
> +
> +static hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
> +{
> +uint64_t base;
> +
> +if (cpu->vhyp) {
> +return cpu->vhyp_class->hpt_mask(cpu->vhyp);
> +}
> +

Re: [PATCH 32/43] target/ppc: Remove includes from mmu-book3s-v3.h

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Drop includes from header that is not needed by the header itself and
> only include them from C files that really need it.

Acked-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-book3s-v3.h | 3 ---
>  target/ppc/mmu-hash64.c| 1 +
>  target/ppc/mmu-radix64.c   | 1 +
>  3 files changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
> index 263ce55c1f..be66e26604 100644
> --- a/target/ppc/mmu-book3s-v3.h
> +++ b/target/ppc/mmu-book3s-v3.h
> @@ -20,9 +20,6 @@
>  #ifndef PPC_MMU_BOOK3S_V3_H
>  #define PPC_MMU_BOOK3S_V3_H
>  
> -#include "mmu-hash64.h"
> -#include "mmu-books.h"
> -
>  #ifndef CONFIG_USER_ONLY
>  
>  /*
> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> index 7bc0323f26..5e1983e334 100644
> --- a/target/ppc/mmu-hash64.c
> +++ b/target/ppc/mmu-hash64.c
> @@ -31,6 +31,7 @@
>  #include "hw/hw.h"
>  #include "internal.h"
>  #include "mmu-book3s-v3.h"
> +#include "mmu-books.h"
>  #include "helper_regs.h"
>  
>  #ifdef CONFIG_TCG
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index cf9619e847..be7a45f254 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -28,6 +28,7 @@
>  #include "internal.h"
>  #include "mmu-radix64.h"
>  #include "mmu-book3s-v3.h"
> +#include "mmu-books.h"
>  
>  /* Radix Partition Table Entry Fields */
>  #define PATE1_R_PRTB   0x0000

Re: [PATCH 31/43] target/ppc/mmu-radix64: Remove externally unused parts from header

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Move the parts not needed outside of mmu-radix64.c from the header to
> the C file to leave only parts in the header that need to be exported.
> Also drop unneded include of this header.
>
> Signed-off-by: BALATON Zoltan 

Acked-by: Nicholas Piggin 

> ---
>  target/ppc/mmu-book3s-v3.c |  1 -
>  target/ppc/mmu-radix64.c   | 49 +++
>  target/ppc/mmu-radix64.h   | 53 +-
>  3 files changed, 50 insertions(+), 53 deletions(-)
>
> diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c
> index c8f69b3df9..a812cb5113 100644
> --- a/target/ppc/mmu-book3s-v3.c
> +++ b/target/ppc/mmu-book3s-v3.c
> @@ -21,7 +21,6 @@
>  #include "cpu.h"
>  #include "mmu-hash64.h"
>  #include "mmu-book3s-v3.h"
> -#include "mmu-radix64.h"
>  
>  bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t 
> *entry)
>  {
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index 5a02e4963b..cf9619e847 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -29,6 +29,37 @@
>  #include "mmu-radix64.h"
>  #include "mmu-book3s-v3.h"
>  
> +/* Radix Partition Table Entry Fields */
> +#define PATE1_R_PRTB   0x0000
> +#define PATE1_R_PRTS   0x001F
> +
> +/* Radix Process Table Entry Fields */
> +#define PRTBE_R_GET_RTS(rts) \
> +rts >> 58) & 0x18) | ((rts >> 5) & 0x7)) + 31)
> +#define PRTBE_R_RPDB0x0F00
> +#define PRTBE_R_RPDS0x001F
> +
> +/* Radix Page Directory/Table Entry Fields */
> +#define R_PTE_VALID 0x8000
> +#define R_PTE_LEAF  0x4000
> +#define R_PTE_SW0   0x2000
> +#define R_PTE_RPN   0x01FFF000
> +#define R_PTE_SW1   0x0E00
> +#define R_GET_SW(sw)(((sw >> 58) & 0x8) | ((sw >> 9) & 0x7))
> +#define R_PTE_R 0x0100
> +#define R_PTE_C 0x0080
> +#define R_PTE_ATT   0x0030
> +#define R_PTE_ATT_NORMAL0x
> +#define R_PTE_ATT_SAO   0x0010
> +#define R_PTE_ATT_NI_IO 0x0020
> +#define R_PTE_ATT_TOLERANT_IO   0x0030
> +#define R_PTE_EAA_PRIV  0x0008
> +#define R_PTE_EAA_R 0x0004
> +#define R_PTE_EAA_RW0x0002
> +#define R_PTE_EAA_X 0x0001
> +#define R_PDE_NLB   PRTBE_R_RPDB
> +#define R_PDE_NLS   PRTBE_R_RPDS
> +
>  static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env,
>   vaddr eaddr,
>   uint64_t *lpid, uint64_t 
> *pid)
> @@ -180,6 +211,24 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, 
> MMUAccessType access_type,
>  }
>  }
>  
> +static int ppc_radix64_get_prot_eaa(uint64_t pte)
> +{
> +return (pte & R_PTE_EAA_R ? PAGE_READ : 0) |
> +   (pte & R_PTE_EAA_RW ? PAGE_READ | PAGE_WRITE : 0) |
> +   (pte & R_PTE_EAA_X ? PAGE_EXEC : 0);
> +}
> +
> +static int ppc_radix64_get_prot_amr(const PowerPCCPU *cpu)
> +{
> +const CPUPPCState *env = &cpu->env;
> +int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */
> +int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 
> IAMR63:62 */
> +
> +return (amr & 0x2 ? 0 : PAGE_WRITE) | /* Access denied if bit is set */
> +   (amr & 0x1 ? 0 : PAGE_READ) |
> +   (iamr & 0x1 ? 0 : PAGE_EXEC);
> +}
> +
>  static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType 
> access_type,
> uint64_t pte, int *fault_cause, int *prot,
> int mmu_idx, bool partition_scoped)
> diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h
> index c5c04a1527..6620b3d648 100644
> --- a/target/ppc/mmu-radix64.h
> +++ b/target/ppc/mmu-radix64.h
> @@ -3,7 +3,7 @@
>  
>  #ifndef CONFIG_USER_ONLY
>  
> -#include "exec/page-protection.h"
> +#ifdef TARGET_PPC64
>  
>  /* Radix Quadrants */
>  #define R_EADDR_MASK0x3FFF
> @@ -14,61 +14,10 @@
>  #define R_EADDR_QUADRANT2   0x8000
>  #define R_EADDR_QUADRANT3   0xC000
>  
> -/* Radix Partition Table Entry Fields */
> -#define PATE1_R_PRTB   0x0000
> -#define PATE1_R_PRTS   0x001F
> -
> -/* Radix Process Table Entry Fields */
> -#define PRTBE_R_GET_RTS(rts) \
> -rts >> 58) & 0x18) | ((rts >> 5) & 0x7)) + 31)
> -#define PRTBE_R_RPDB0x0F00
> -#define PRTBE_R_RPDS0x001F
> -
> -/* Radix Page Directory/Table Entry Fields */
> -#define R_PTE_VALID 0x8000
> -#define R

Re: [PATCH 33/43] target/ppc: Remove single use static inline function

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> The ger_pack_masks() function is only used once and the inverse of
> this operation is already inlined so it can be inlined too in the only
> caller and removed from the header.

Is this needed for later patches? I might prefer to keep it, even
move it into vsx-impl.c.inc and pull its inverse out into its own
function too even.

Thanks,
Nick

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/internal.h   | 9 -
>  target/ppc/translate/vsx-impl.c.inc | 6 --
>  2 files changed, 4 insertions(+), 11 deletions(-)
>
> diff --git a/target/ppc/internal.h b/target/ppc/internal.h
> index 20fb2ec593..8e5a241f74 100644
> --- a/target/ppc/internal.h
> +++ b/target/ppc/internal.h
> @@ -293,13 +293,4 @@ FIELD(GER_MSK, XMSK, 0, 4)
>  FIELD(GER_MSK, YMSK, 4, 4)
>  FIELD(GER_MSK, PMSK, 8, 8)
>  
> -static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk)
> -{
> -int msk = 0;
> -msk = FIELD_DP32(msk, GER_MSK, XMSK, xmsk);
> -msk = FIELD_DP32(msk, GER_MSK, YMSK, ymsk);
> -msk = FIELD_DP32(msk, GER_MSK, PMSK, pmsk);
> -return msk;
> -}
> -
>  #endif /* PPC_INTERNAL_H */
> diff --git a/target/ppc/translate/vsx-impl.c.inc 
> b/target/ppc/translate/vsx-impl.c.inc
> index 0266f09119..62950d348a 100644
> --- a/target/ppc/translate/vsx-impl.c.inc
> +++ b/target/ppc/translate/vsx-impl.c.inc
> @@ -2819,7 +2819,7 @@ static bool trans_XXSETACCZ(DisasContext *ctx, arg_X_a 
> *a)
>  static bool do_ger(DisasContext *ctx, arg_MMIRR_XX3 *a,
>  void (*helper)(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32))
>  {
> -uint32_t mask;
> +uint32_t mask = 0;
>  TCGv_ptr xt, xa, xb;
>  REQUIRE_INSNS_FLAGS2(ctx, ISA310);
>  REQUIRE_VSX(ctx);
> @@ -2832,7 +2832,9 @@ static bool do_ger(DisasContext *ctx, arg_MMIRR_XX3 *a,
>  xa = gen_vsr_ptr(a->xa);
>  xb = gen_vsr_ptr(a->xb);
>  
> -mask = ger_pack_masks(a->pmsk, a->ymsk, a->xmsk);
> +mask = FIELD_DP32(mask, GER_MSK, XMSK, a->xmsk);
> +mask = FIELD_DP32(mask, GER_MSK, YMSK, a->ymsk);
> +mask = FIELD_DP32(mask, GER_MSK, PMSK, a->pmsk);
>  helper(tcg_env, xa, xb, xt, tcg_constant_i32(mask));
>  return true;
>  }

Re: [PATCH 35/43] target/ppc/mmu-hash32.c: Change parameter type of ppc_hash32_bat_lookup()

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> This function takes PowerPCCPU but only needs the env from it. Change
> its parameter to CPUPPCState *env.
>

Reviewed-by: Nicholas Piggin 

> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 44b16142ab..a2c0ac05d2 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -48,11 +48,10 @@ static target_ulong hash32_bat_size(int mmu_idx,
>  return BATU32_BEPI & ~((batu & BATU32_BL) << 15);
>  }
>  
> -static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
> +static hwaddr ppc_hash32_bat_lookup(CPUPPCState *env, target_ulong ea,
>  MMUAccessType access_type, int *prot,
>  int mmu_idx)
>  {
> -CPUPPCState *env = &cpu->env;
>  target_ulong *BATlt, *BATut;
>  bool ifetch = access_type == MMU_INST_FETCH;
>  int i;
> @@ -316,7 +315,7 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>  
>  /* 2. Check Block Address Translation entries (BATs) */
>  if (env->nb_BATs != 0) {
> -raddr = ppc_hash32_bat_lookup(cpu, eaddr, access_type, protp, 
> mmu_idx);
> +raddr = ppc_hash32_bat_lookup(env, eaddr, access_type, protp, 
> mmu_idx);
>  if (raddr != -1) {
>  if (!check_prot_access_type(*protp, access_type)) {
>  if (guest_visible) {

Re: [PATCH 36/43] target/ppc/mmu-hash32: Remove some static inlines from header

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Two of these are not used anywhere and the other two are used only
> once and can be inlined and removed from the header.

I'd prefer to put these in the .c file. Probably calculating the
base once would generate marginally better code since it would not
have to keep reloading it (since there is a barrier there it can't
cache the value).

Thanks,
Nick

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c |  5 +++--
>  target/ppc/mmu-hash32.h | 32 
>  2 files changed, 3 insertions(+), 34 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index a2c0ac05d2..7a6a674f8a 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -206,17 +206,18 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, 
> hwaddr pteg_off,
>  {
>  hwaddr pte_offset = pteg_off;
>  target_ulong pte0, pte1;
> +hwaddr base = ppc_hash32_hpt_base(cpu);
>  int i;
>  
>  for (i = 0; i < HPTES_PER_GROUP; i++) {
> -pte0 = ppc_hash32_load_hpte0(cpu, pte_offset);
> +pte0 = ldl_phys(CPU(cpu)->as, base + pte_offset);
>  /*
>   * pte0 contains the valid bit and must be read before pte1,
>   * otherwise we might see an old pte1 with a new valid bit and
>   * thus an inconsistent hpte value
>   */
>  smp_rmb();
> -pte1 = ppc_hash32_load_hpte1(cpu, pte_offset);
> +pte1 = ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 
> 2);
>  
>  if ((pte0 & HPTE32_V_VALID)
>  && (secondary == !!(pte0 & HPTE32_V_SECONDARY))
> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
> index 2838de031c..4db55fb0a0 100644
> --- a/target/ppc/mmu-hash32.h
> +++ b/target/ppc/mmu-hash32.h
> @@ -69,38 +69,6 @@ static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
>  return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0x;
>  }
>  
> -static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu,
> - hwaddr pte_offset)
> -{
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -return ldl_phys(CPU(cpu)->as, base + pte_offset);
> -}
> -
> -static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu,
> - hwaddr pte_offset)
> -{
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
> -}
> -
> -static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu,
> -  hwaddr pte_offset, target_ulong 
> pte0)
> -{
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -stl_phys(CPU(cpu)->as, base + pte_offset, pte0);
> -}
> -
> -static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu,
> -  hwaddr pte_offset, target_ulong 
> pte1)
> -{
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
> -}
> -
>  static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
>  {
>  return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);

Re: [PATCH 37/43] target/ppc/mmu-hash32.c: Return and use pte address instead of base + offset

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Change ppc_hash32_pteg_search() to return pte address instead of an
> offset to avoid needing to get the base and add offset to it when we
> already have the address we need.

I think this looks good, but would need small rebase if the previous
patch is changed.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 51 -
>  1 file changed, 20 insertions(+), 31 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 7a6a674f8a..cc1e790d0e 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -204,58 +204,48 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, 
> hwaddr pteg_off,
>   bool secondary, target_ulong ptem,
>   ppc_hash_pte32_t *pte)
>  {
> -hwaddr pte_offset = pteg_off;
> +hwaddr pte_addr = ppc_hash32_hpt_base(cpu) + pteg_off;
>  target_ulong pte0, pte1;
> -hwaddr base = ppc_hash32_hpt_base(cpu);
>  int i;
>  
> -for (i = 0; i < HPTES_PER_GROUP; i++) {
> -pte0 = ldl_phys(CPU(cpu)->as, base + pte_offset);
> +for (i = 0; i < HPTES_PER_GROUP; i++, pte_addr += HASH_PTE_SIZE_32) {
> +pte0 = ldl_phys(CPU(cpu)->as, pte_addr);
>  /*
>   * pte0 contains the valid bit and must be read before pte1,
>   * otherwise we might see an old pte1 with a new valid bit and
>   * thus an inconsistent hpte value
>   */
>  smp_rmb();
> -pte1 = ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 
> 2);
> +pte1 = ldl_phys(CPU(cpu)->as, pte_addr + HASH_PTE_SIZE_32 / 2);
>  
>  if ((pte0 & HPTE32_V_VALID)
>  && (secondary == !!(pte0 & HPTE32_V_SECONDARY))
>  && HPTE32_V_COMPARE(pte0, ptem)) {
>  pte->pte0 = pte0;
>  pte->pte1 = pte1;
> -return pte_offset;
> +return pte_addr;
>  }
> -
> -pte_offset += HASH_PTE_SIZE_32;
>  }
> -
>  return -1;
>  }
>  
> -static void ppc_hash32_set_r(PowerPCCPU *cpu, hwaddr pte_offset, uint32_t 
> pte1)
> +static void ppc_hash32_set_r(PowerPCCPU *cpu, hwaddr pte_addr, uint32_t pte1)
>  {
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -hwaddr offset = pte_offset + 6;
> -
>  /* The HW performs a non-atomic byte update */
> -stb_phys(CPU(cpu)->as, base + offset, ((pte1 >> 8) & 0xff) | 0x01);
> +stb_phys(CPU(cpu)->as, pte_addr + 6, ((pte1 >> 8) & 0xff) | 0x01);
>  }
>  
> -static void ppc_hash32_set_c(PowerPCCPU *cpu, hwaddr pte_offset, uint64_t 
> pte1)
> +static void ppc_hash32_set_c(PowerPCCPU *cpu, hwaddr pte_addr, uint64_t pte1)
>  {
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -hwaddr offset = pte_offset + 7;
> -
>  /* The HW performs a non-atomic byte update */
> -stb_phys(CPU(cpu)->as, base + offset, (pte1 & 0xff) | 0x80);
> +stb_phys(CPU(cpu)->as, pte_addr + 7, (pte1 & 0xff) | 0x80);
>  }
>  
>  static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
>   target_ulong sr, target_ulong eaddr,
>   ppc_hash_pte32_t *pte)
>  {
> -hwaddr pteg_off, pte_offset;
> +hwaddr pteg_off, pte_addr;
>  hwaddr hash;
>  uint32_t vsid, pgidx, ptem;
>  
> @@ -277,18 +267,18 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
>  ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu),
>  vsid, ptem, hash);
>  pteg_off = get_pteg_offset32(cpu, hash);
> -pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 0, ptem, pte);
> -if (pte_offset == -1) {
> +pte_addr = ppc_hash32_pteg_search(cpu, pteg_off, 0, ptem, pte);
> +if (pte_addr == -1) {
>  /* Secondary PTEG lookup */
>  qemu_log_mask(CPU_LOG_MMU, "1 htab=" HWADDR_FMT_plx "/" 
> HWADDR_FMT_plx
>  " vsid=%" PRIx32 " api=%" PRIx32
>  " hash=" HWADDR_FMT_plx "\n", ppc_hash32_hpt_base(cpu),
>  ppc_hash32_hpt_mask(cpu), vsid, ptem, ~hash);
>  pteg_off = get_pteg_offset32(cpu, ~hash);
> -pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 1, ptem, pte);
> +pte_addr = ppc_hash32_pteg_search(cpu, pteg_off, 1, ptem, pte);
>  }
>  
> -return pte_offset;
> +return pte_addr;
>  }
>  
>  bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType 
> access_type,
> @@ -298,7 +288,7 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = &cpu->env;
>  target_ulong sr;
> -hwaddr pte_offset, raddr;
> +hwaddr pte_addr, raddr;
>  ppc_hash_pte32_t pte;
>  bool key;
>  int prot;
> @@ -360,8 +350,8 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>  }
>  
>  /* 6.

Re: [PATCH 39/43] target/ppc: Change parameter type of some inline functions

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> These functions take PowerPCCPU but only need the env from it. Change
> their parameter to CPUPPCState *env.

I suppose that's okay. Probably generates a little better code.

Acked-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 13 +++--
>  target/ppc/mmu-hash32.h | 12 ++--
>  target/ppc/mmu_common.c | 20 +---
>  3 files changed, 22 insertions(+), 23 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 6d0adf3357..f18faf0f46 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -244,10 +244,11 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
>   target_ulong sr, target_ulong eaddr,
>   ppc_hash_pte32_t *pte)
>  {
> +CPUPPCState *env = &cpu->env;
>  hwaddr hpt_base, pteg_off, pte_addr, hash;
>  uint32_t vsid, pgidx, ptem;
>  
> -hpt_base = ppc_hash32_hpt_base(cpu);
> +hpt_base = ppc_hash32_hpt_base(env);
>  vsid = sr & SR32_VSID;
>  pgidx = (eaddr & ~SEGMENT_MASK_256M) >> TARGET_PAGE_BITS;
>  hash = vsid ^ pgidx;
> @@ -256,21 +257,21 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
>  /* Page address translation */
>  qemu_log_mask(CPU_LOG_MMU, "htab_base " HWADDR_FMT_plx " htab_mask "
>HWADDR_FMT_plx " hash " HWADDR_FMT_plx "\n",
> -  hpt_base, ppc_hash32_hpt_mask(cpu), hash);
> +  hpt_base, ppc_hash32_hpt_mask(env), hash);
>  
>  /* Primary PTEG lookup */
>  qemu_log_mask(CPU_LOG_MMU, "0 htab=" HWADDR_FMT_plx "/" HWADDR_FMT_plx
>" vsid=%" PRIx32 " ptem=%" PRIx32 " hash=" HWADDR_FMT_plx
> -  "\n", hpt_base, ppc_hash32_hpt_mask(cpu), vsid, ptem, 
> hash);
> -pteg_off = get_pteg_offset32(cpu, hash);
> +  "\n", hpt_base, ppc_hash32_hpt_mask(env), vsid, ptem, 
> hash);
> +pteg_off = get_pteg_offset32(env, hash);
>  pte_addr = ppc_hash32_pteg_search(cpu, hpt_base + pteg_off, 0, ptem, 
> pte);
>  if (pte_addr == -1) {
>  /* Secondary PTEG lookup */
>  qemu_log_mask(CPU_LOG_MMU, "1 htab=" HWADDR_FMT_plx "/" 
> HWADDR_FMT_plx
>" vsid=%" PRIx32 " api=%" PRIx32 " hash=" 
> HWADDR_FMT_plx
> -  "\n", hpt_base, ppc_hash32_hpt_mask(cpu), vsid, ptem,
> +  "\n", hpt_base, ppc_hash32_hpt_mask(env), vsid, ptem,
>~hash);
> -pteg_off = get_pteg_offset32(cpu, ~hash);
> +pteg_off = get_pteg_offset32(env, ~hash);
>  pte_addr = ppc_hash32_pteg_search(cpu, hpt_base + pteg_off, 1, ptem,
>pte);
>  }
> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
> index 4db55fb0a0..ec8d881def 100644
> --- a/target/ppc/mmu-hash32.h
> +++ b/target/ppc/mmu-hash32.h
> @@ -59,19 +59,19 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>  #define HPTE32_R_WIMG   0x0078
>  #define HPTE32_R_PP 0x0003
>  
> -static inline hwaddr ppc_hash32_hpt_base(PowerPCCPU *cpu)
> +static inline hwaddr ppc_hash32_hpt_base(CPUPPCState *env)
>  {
> -return cpu->env.spr[SPR_SDR1] & SDR_32_HTABORG;
> +return env->spr[SPR_SDR1] & SDR_32_HTABORG;
>  }
>  
> -static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
> +static inline hwaddr ppc_hash32_hpt_mask(CPUPPCState *env)
>  {
> -return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0x;
> +return ((env->spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0x;
>  }
>  
> -static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
> +static inline hwaddr get_pteg_offset32(CPUPPCState *env, hwaddr hash)
>  {
> -return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);
> +return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(env);
>  }
>  
>  static inline bool ppc_hash32_key(bool pr, target_ulong sr)
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 60f8736210..b45eb64f6e 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -166,8 +166,8 @@ static int ppc6xx_tlb_check(CPUPPCState *env, hwaddr 
> *raddr, int *prot,
>  #if defined(DUMP_PAGE_TABLES)
>  if (qemu_loglevel_mask(CPU_LOG_MMU)) {
>  CPUState *cs = env_cpu(env);
> -hwaddr base = ppc_hash32_hpt_base(env_archcpu(env));
> -hwaddr len = ppc_hash32_hpt_mask(env_archcpu(env)) + 0x80;
> +hwaddr base = ppc_hash32_hpt_base(env);
> +hwaddr len = ppc_hash32_hpt_mask(env) + 0x80;
>  uint32_t a0, a1, a2, a3;
>  
>  qemu_log("Page table: " HWADDR_FMT_plx " len " HWADDR_FMT_plx "\n",
> @@ -263,7 +263,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> hwaddr *raddr,
> hwaddr *hashp, bool *keyp

Re: [PATCH 40/43] target/ppc: Change parameter type of ppc64_v3_radix()

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> This function takes PowerPCCPU but only needs the env from it. Change
> its parameter to CPUPPCState *env.

Acked-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  hw/ppc/spapr_rtas.c| 2 +-
>  target/ppc/mmu-book3s-v3.h | 4 ++--
>  target/ppc/mmu_common.c| 4 ++--
>  3 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index f329693c55..38e94fc0d7 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -177,7 +177,7 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, 
> SpaprMachineState *spapr,
>   * New cpus are expected to start in the same radix/hash mode
>   * as the existing CPUs
>   */
> -if (ppc64_v3_radix(callcpu)) {
> +if (ppc64_v3_radix(&callcpu->env)) {
>  lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
>  } else {
>  lpcr &= ~(LPCR_UPRT | LPCR_GTSE | LPCR_HR);
> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
> index be66e26604..e52129ff7f 100644
> --- a/target/ppc/mmu-book3s-v3.h
> +++ b/target/ppc/mmu-book3s-v3.h
> @@ -75,9 +75,9 @@ bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid,
>   * dig out the partition table in the fast path. This is
>   * also how the HW uses it.
>   */
> -static inline bool ppc64_v3_radix(PowerPCCPU *cpu)
> +static inline bool ppc64_v3_radix(CPUPPCState *env)
>  {
> -return !!(cpu->env.spr[SPR_LPCR] & LPCR_HR);
> +return !!(env->spr[SPR_LPCR] & LPCR_HR);
>  }
>  
>  #endif /* TARGET_PPC64 */
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index b45eb64f6e..ab055ca96b 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -565,7 +565,7 @@ void dump_mmu(CPUPPCState *env)
>  dump_slb(env_archcpu(env));
>  break;
>  case POWERPC_MMU_3_00:
> -if (ppc64_v3_radix(env_archcpu(env))) {
> +if (ppc64_v3_radix(env)) {
>  qemu_log_mask(LOG_UNIMP, "%s: the PPC64 MMU is unsupported\n",
>__func__);
>  } else {
> @@ -810,7 +810,7 @@ bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>  switch (cpu->env.mmu_model) {
>  #if defined(TARGET_PPC64)
>  case POWERPC_MMU_3_00:
> -if (ppc64_v3_radix(cpu)) {
> +if (ppc64_v3_radix(&cpu->env)) {
>  return ppc_radix64_xlate(cpu, eaddr, access_type, raddrp,
>   psizep, protp, mmu_idx, guest_visible);
>  }

Re: [PATCH v3] virtio: Implement Virtio Backend for SD/MMC in QEMU

2024-07-04 Thread Mikhail Krasheninnikov

On Wed, 3 Jul 2024, Michael S. Tsirkin wrote:

> On Wed, Jul 03, 2024 at 10:55:17PM +0300, Mikhail Krasheninnikov wrote:
> > 
> > Hello, Alex!
> > 
> > No, there's no patch to the VirtIO specification yet. This is 
> > proof-of-concept solution since I'm not sure that I did everything 
> > correct with the design (and as folks' reviews show, for a good reason). 
> > As soon as most obvious issues would be out of the way, I think I'll 
> > submit a patch.
> 
> 
> Mikhail, if you want people to review your patches but not merge
> them yet, pls use an RFC tag in the subject to avoid confusion.
> 
> Thanks,
> 
> -- 
> MST
> 
> 

Hello, Michael!

I was planning to submit three patches: to the kernel, emulator and Virtio 
specification around the same time - as soon as the obvious bugs are 
fixed, I'll submit a patch to the specification. I thought it wasn't 
necessary to use the RFC tag in that case, but if you think it is, 
I'll include it with the next version of the patch.

[PATCH v3] memory tier: consolidate the initialization of memory tiers

2024-07-04 Thread Ho-Ren (Jack) Chuang

The current memory tier initialization process is distributed across
two different functions, memory_tier_init() and memory_tier_late_init().
This design is hard to maintain. Thus, this patch is proposed to reduce
the possible code paths by consolidating different
initialization patches into one.

The earlier discussion with Jonathan and Ying is listed here:
https://lore.kernel.org/lkml/20240405150244.4...@huawei.com/

If we want to put these two initializations together, they must be
placed together in the later function. Because only at that time,
the HMAT information will be ready, adist between nodes can be
calculated, and memory tiering can be established based on the adist.
So we position the initialization at memory_tier_init() to the
memory_tier_late_init() call. Moreover, it's natural to keep
memory_tier initialization in drivers at device_initcall() level.

If we simply move the set_node_memory_tier() from memory_tier_init()
to late_initcall(), it will result in HMAT not registering
the mt_adistance_algorithm callback function, because
set_node_memory_tier() is not performed during the memory tiering
initialization phase, leading to a lack of correct default_dram
information.

Therefore, we introduced a nodemask to pass the information of the
default DRAM nodes. The reason for not choosing to reuse
default_dram_type->nodes is that it is not clean enough. So in the end,
we use a __initdata variable, which is a variable that is released once
initialization is complete, including both CPU and memory nodes for HMAT
to iterate through.

This patchset is based on commits  ("memory tier: create
CPUless memory tiers after obtaining HMAT info") and
 ("memory tier: dax/kmem: introduce an abstract layer for
finding, allocating, and putting memory types"):
[0/2] 
https://lkml.kernel.org/r/20240405000707.2670063-1-horenchu...@bytedance.com
[1/2] 
https://lkml.kernel.org/r/20240405000707.2670063-2-horenchu...@bytedance.com
[1/2] 
https://lkml.kernel.org/r/20240405000707.2670063-3-horenchu...@bytedance.com

Signed-off-by: Ho-Ren (Jack) Chuang 
Suggested-by: Jonathan Cameron 
---
- v3:
 * Combine the cover letter and the patch.
 * Remove extern default_dram_type
 * Remove __initdata in a extern variable
 * Roll back coding style, fix comment description
- v2:
 Thanks to Huang, Ying's and Andrew's comments
 * Add a cover letter
 * Add Suggested-by: Jonathan Cameron 
 * Add get/put_online_mems() protection in memory_tier_late_init()
 * If memtype is set, skip initializing its node
 * Remove redundant code/comments or rewrite code in a cleaner manner
 * https://lore.kernel.org/all/20240628060925.303309-1-horen.chu...@linux.dev/
- v1:
 * https://lore.kernel.org/all/20240621044833.3953055-1-horen.chu...@linux.dev/

- Open to discussion:
 * Should we use `continue` or `break` if set_node_memory_tier() fails in
   memory_tier_late_init()? I've changed it to `continue` in v3.
 * Compute the default_dram_nodes mask in hmat when needed? Currently,
   it still remains in memory-tiers.c
 * Instead of using guard(mutex)(), use mutex_lock() as it was to make a
   cleaner diff?

Thanks,
Ho-Ren (Jack) Chuang

 drivers/acpi/numa/hmat.c |  5 +---
 include/linux/memory-tiers.h |  2 ++
 mm/memory-tiers.c| 54 ++--
 3 files changed, 24 insertions(+), 37 deletions(-)

diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 2c8ccc91ebe6..a2f9e7a4b479 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -940,10 +940,7 @@ static int hmat_set_default_dram_perf(void)
struct memory_target *target;
struct access_coordinate *attrs;
 
-   if (!default_dram_type)
-   return -EIO;
-
-   for_each_node_mask(nid, default_dram_type->nodes) {
+   for_each_node_mask(nid, default_dram_nodes) {
pxm = node_to_pxm(nid);
target = find_mem_target(pxm);
if (!target)
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 0d70788558f4..0dc0cf2863e2 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -38,6 +38,7 @@ struct access_coordinate;
 #ifdef CONFIG_NUMA
 extern bool numa_demotion_enabled;
 extern struct memory_dev_type *default_dram_type;
+extern nodemask_t default_dram_nodes;
 struct memory_dev_type *alloc_memory_type(int adistance);
 void put_memory_type(struct memory_dev_type *memtype);
 void init_node_memory_type(int node, struct memory_dev_type *default_type);
@@ -76,6 +77,7 @@ static inline bool node_is_toptier(int node)
 
 #define numa_demotion_enabled  false
 #define default_dram_type  NULL
+#define default_dram_nodes NODE_MASK_NONE
 /*
  * CONFIG_NUMA implementation returns non NULL error.
  */
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 6632102bd5c9..4775b3a3dabe 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -43,6 +43,7 @@ static LIST_HEAD(memory_tiers);
 static LIST_HEAD(default_memory_typ

Re: [PATCH 41/43] target/ppc: Change MMU xlate functions to take CPUState

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> The callers of xlate functions get CPUState which is then cast to
> PowerPCCPU that is then cast back to CPUState by most xlate functions.
> Avoid this back and forth casting by passing the existing CPUState to
> xlate functions and let them convert it as needed.

I guess. Is this faster?

Thanks,
Nick

Re: [PATCH V13 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code

2024-07-04 Thread Nicholas Piggin

Looks like there is a bit of noise around this recently. Do we
think the hotplug patches can get over the line this time?

If not, perhaps we work with Salil to get this patch 1 upstream
at least.

Thanks,
Nick

On Tue Jun 25, 2024 at 3:08 PM AEST, Harsh Prateek Bora wrote:
> +qemu-devel, qemu-ppc
>
> Ping!
>
> On 6/17/24 15:18, Harsh Prateek Bora wrote:
> > 
> > + MST, Igor - to help with early review/merge. TIA.
> > 
> > On 6/14/24 16:06, Salil Mehta wrote:
> >> Hello
> >>
> >>>   From: Harsh Prateek Bora 
> >>>   Sent: Friday, June 14, 2024 6:24 AM
> >>>   Hi Paolo, Nick,
> >>>   Can this patch 1/8 be merged earlier provided we have got 
> >>> sufficient R-bys
> >>>   for it and the review of entire series may take a longer time?
> >>>   We have some ppc64 patches based on it, hence the ask.
> >>>   Hi Salil,
> >>>   I am hoping we are not expecting anymore changes to this patch, please
> >>>   confirm.
> >>
> >>
> >> I do not expect any change. I had requested Michael to merge the complete
> >> series as it is stranding other users. He then requested Igor to take 
> >> a final look but
> >> he has not reverted yet. I'll remind Michael again. BTW, can you reply 
> >> to below
> >> patch explicitly indicating your interest in the series so that MST 
> >> knows who else
> >> are the stake holders here
> >>
> >> https://lore.kernel.org/qemu-devel/20240605160327.3c71f...@imammedo.users.ipa.redhat.com/
> >>
> >>
> >> Hi Paolo,
> >>
> >> A request, would it be possible to skim through this series from KVM 
> >> perspective?
> >> (although nothing has changed which will affect the KVM and this is 
> >> architecture
> >> agnostic patch-set)
> >>
> >> Many thanks!
> >>
> >> Best
> >> Salil.
> >>
> >>
> >>>   regards,
> >>>   Harsh
> >>>   On 6/7/24 17:26, Salil Mehta wrote:
> >>>   > KVM vCPU creation is done once during the vCPU realization when Qemu
> >>>   > vCPU thread is spawned. This is common to all the architectures 
> >>> as of now.
> >>>   >
> >>>   > Hot-unplug of vCPU results in destruction of the vCPU object in QOM
> >>>   > but the corresponding KVM vCPU object in the Host KVM is not 
> >>> destroyed
> >>>   > as KVM doesn't support vCPU removal. Therefore, its 
> >>> representative KVM
> >>>   > vCPU object/context in Qemu is parked.
> >>>   >
> >>>   > Refactor architecture common logic so that some APIs could be reused
> >>>   > by vCPU Hotplug code of some architectures likes ARM, Loongson etc.
> >>>   > Update new/old APIs with trace events. No functional change is 
> >>> intended
> >>>   here.
> >>>   >
> >>>   > Signed-off-by: Salil Mehta 
> >>>   > Reviewed-by: Gavin Shan 
> >>>   > Tested-by: Vishnu Pajjuri 
> >>>   > Reviewed-by: Jonathan Cameron 
> >>>   > Tested-by: Xianglai Li 
> >>>   > Tested-by: Miguel Luis 
> >>>   > Reviewed-by: Shaoqin Huang 
> >>>   > Reviewed-by: Vishnu Pajjuri 
> >>>   > Reviewed-by: Nicholas Piggin 
> >>>   > Tested-by: Zhao Liu 
> >>>   > Reviewed-by: Zhao Liu 
> >>>   > Reviewed-by: Harsh Prateek Bora 
> >>>   > ---
> >>>   >   accel/kvm/kvm-all.c    | 95 
> >>> 
> >>>   --
> >>>   >   accel/kvm/kvm-cpus.h   |  1 -
> >>>   >   accel/kvm/trace-events |  5 ++-
> >>>   >   include/sysemu/kvm.h   | 25 +++
> >>>   >   4 files changed, 92 insertions(+), 34 deletions(-)
> >>>   >
> >>>   > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index
> >>>   > c0be9f5eed..8f9128bb92 100644
> >>>   > --- a/accel/kvm/kvm-all.c
> >>>   > +++ b/accel/kvm/kvm-all.c
> >>>   > @@ -340,14 +340,71 @@ err:
> >>>   >   return ret;
> >>>   >   }
> >>>   >
> >>>   > +void kvm_park_vcpu(CPUState *cpu)
> >>>   > +{
> >>>   > +    struct KVMParkedVcpu *vcpu;
> >>>   > +
> >>>   > +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
> >>>   > +
> >>>   > +    vcpu = g_malloc0(sizeof(*vcpu));
> >>>   > +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> >>>   > +    vcpu->kvm_fd = cpu->kvm_fd;
> >>>   > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); }
> >>>   > +
> >>>   > +int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id) {
> >>>   > +    struct KVMParkedVcpu *cpu;
> >>>   > +    int kvm_fd = -ENOENT;
> >>>   > +
> >>>   > +    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
> >>>   > +    if (cpu->vcpu_id == vcpu_id) {
> >>>   > +    QLIST_REMOVE(cpu, node);
> >>>   > +    kvm_fd = cpu->kvm_fd;
> >>>   > +    g_free(cpu);
> >>>   > +    }
> >>>   > +    }
> >>>   > +
> >>>   > +    trace_kvm_unpark_vcpu(vcpu_id, kvm_fd > 0 ? "unparked" : "not
> >>>   > + found parked");
> >>>   > +
> >>>   > +    return kvm_fd;
> >>>   > +}
> >>>   > +
> >>>   > +int kvm_create_vcpu(CPUState *cpu)
> >>>   > +{
> >>>   > +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
> >>>   > +    KVMState *s = kvm_state;
> >>>   > +    int kvm_fd;
> >>>   > +
> >>>   > +    /* check if the KVM vCPU already exist but is parked */
> >>>   > +    kvm_fd = kvm_unpark_vcpu(s, vcpu_id);
> >>>   > +

Re: [PATCH v2 1/7] target/ppc: use locally stored msr and avoid indirect access

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> hreg_compute_hflags_value already stores msr locally to be used in most
> of the logic in the routine however some instances are still using
> env->msr which is unnecessary. Use locally stored value as available.
>
> Signed-off-by: Harsh Prateek Bora 

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/helper_regs.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index 25258986e3..945fa1a596 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -106,10 +106,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState 
> *env)
>  
>  if (ppc_flags & POWERPC_FLAG_DE) {
>  target_ulong dbcr0 = env->spr[SPR_BOOKE_DBCR0];
> -if ((dbcr0 & DBCR0_ICMP) && FIELD_EX64(env->msr, MSR, DE)) {
> +if ((dbcr0 & DBCR0_ICMP) && FIELD_EX64(msr, MSR, DE)) {
>  hflags |= 1 << HFLAGS_SE;
>  }
> -if ((dbcr0 & DBCR0_BRT) && FIELD_EX64(env->msr, MSR, DE)) {
> +if ((dbcr0 & DBCR0_BRT) && FIELD_EX64(msr, MSR, DE)) {
>  hflags |= 1 << HFLAGS_BE;
>  }
>  } else {

Re: [PATCH v2 2/7] target/ppc: optimize hreg_compute_pmu_hflags_value

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> Cache env->spr[SPR_POWER_MMCR0] in a local variable as used in multiple
> conditions to avoid multiple indirect accesses.
>
> Signed-off-by: Harsh Prateek Bora 

Compiler might cache it in a reg, but anyway I like it.

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/helper_regs.c | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index 945fa1a596..d09dcacd5e 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -50,15 +50,16 @@ void hreg_swap_gpr_tgpr(CPUPPCState *env)
>  static uint32_t hreg_compute_pmu_hflags_value(CPUPPCState *env)
>  {
>  uint32_t hflags = 0;
> -
>  #if defined(TARGET_PPC64)
> -if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC0) {
> +target_ulong mmcr0 = env->spr[SPR_POWER_MMCR0];
> +
> +if (mmcr0 & MMCR0_PMCC0) {
>  hflags |= 1 << HFLAGS_PMCC0;
>  }
> -if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
> +if (mmcr0 & MMCR0_PMCC1) {
>  hflags |= 1 << HFLAGS_PMCC1;
>  }
> -if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCjCE) {
> +if (mmcr0 & MMCR0_PMCjCE) {
>  hflags |= 1 << HFLAGS_PMCJCE;
>  }
>

Re: [PATCH v2 3/7] target/ppc: optimize hreg_compute_pmu_hflags_value

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> The second if-condition can be true only if the first one above is true.
> Enclose the latter into the former to avoid un-necessary check if first
> condition fails.
>
> Signed-off-by: Harsh Prateek Bora 
> Reviewed-by: BALATON Zoltan 

Ditto for this it's possible compiler can transform it, but I
like the code.

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/helper_regs.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index d09dcacd5e..261a8ba79f 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -66,9 +66,9 @@ static uint32_t hreg_compute_pmu_hflags_value(CPUPPCState 
> *env)
>  #ifndef CONFIG_USER_ONLY
>  if (env->pmc_ins_cnt) {
>  hflags |= 1 << HFLAGS_INSN_CNT;
> -}
> -if (env->pmc_ins_cnt & 0x1e) {
> -hflags |= 1 << HFLAGS_PMC_OTHER;
> +if (env->pmc_ins_cnt & 0x1e) {
> +hflags |= 1 << HFLAGS_PMC_OTHER;
> +}
>  }
>  #endif
>  #endif

Re: [PATCH v3] virtio: Implement Virtio Backend for SD/MMC in QEMU

2024-07-04 Thread Michael S. Tsirkin

On Thu, Jul 04, 2024 at 10:25:53AM +0300, Mikhail Krasheninnikov wrote:
> 
> On Wed, 3 Jul 2024, Michael S. Tsirkin wrote:
> 
> > On Wed, Jul 03, 2024 at 10:55:17PM +0300, Mikhail Krasheninnikov wrote:
> > > 
> > > Hello, Alex!
> > > 
> > > No, there's no patch to the VirtIO specification yet. This is 
> > > proof-of-concept solution since I'm not sure that I did everything 
> > > correct with the design (and as folks' reviews show, for a good reason). 
> > > As soon as most obvious issues would be out of the way, I think I'll 
> > > submit a patch.
> > 
> > 
> > Mikhail, if you want people to review your patches but not merge
> > them yet, pls use an RFC tag in the subject to avoid confusion.
> > 
> > Thanks,
> > 
> > -- 
> > MST
> > 
> > 
> 
> Hello, Michael!
> 
> I was planning to submit three patches: to the kernel, emulator and Virtio 
> specification around the same time - as soon as the obvious bugs are 
> fixed, I'll submit a patch to the specification. I thought it wasn't 
> necessary to use the RFC tag in that case, but if you think it is, 
> I'll include it with the next version of the patch.

RFC means "this is proof of concept". On the one hand some people
won't bother reviewing then. On the other your patch will be
judged less harshly. If your code still has debugging printks,
it's clearly an RFC at best.

-- 
MST

Re: [PATCH v2 4/7] target/ppc: optimize p9 exception handling routines

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> Currently, p9 exception handling has multiple if-condition checks where
> it does an indirect access to pending_interrupts via env. Pass the
> value during entry to avoid multiple indirect accesses.

Does code change? I don't mind, would like all CPU funtions done
the same way if we're going to do this though.

Thanks,
Nick

>
> Signed-off-by: Harsh Prateek Bora  
> ---
>  target/ppc/excp_helper.c | 47 +---
>  1 file changed, 25 insertions(+), 22 deletions(-)
>
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 0712098cf7..704eddac63 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -1842,10 +1842,12 @@ static int p8_next_unmasked_interrupt(CPUPPCState 
> *env)
>   PPC_INTERRUPT_WDT | PPC_INTERRUPT_CDOORBELL | PPC_INTERRUPT_FIT |  \
>   PPC_INTERRUPT_PIT | PPC_INTERRUPT_THERM)
>  
> -static int p9_interrupt_powersave(CPUPPCState *env)
> +static int p9_interrupt_powersave(CPUPPCState *env,
> +  uint32_t pending_interrupts)
>  {
> +
>  /* External Exception */
> -if ((env->pending_interrupts & PPC_INTERRUPT_EXT) &&
> +if ((pending_interrupts & PPC_INTERRUPT_EXT) &&
>  (env->spr[SPR_LPCR] & LPCR_EEE)) {
>  bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC);
>  if (!heic || !FIELD_EX64_HV(env->msr) ||
> @@ -1854,48 +1856,49 @@ static int p9_interrupt_powersave(CPUPPCState *env)
>  }
>  }
>  /* Decrementer Exception */
> -if ((env->pending_interrupts & PPC_INTERRUPT_DECR) &&
> +if ((pending_interrupts & PPC_INTERRUPT_DECR) &&
>  (env->spr[SPR_LPCR] & LPCR_DEE)) {
>  return PPC_INTERRUPT_DECR;
>  }
>  /* Machine Check or Hypervisor Maintenance Exception */
>  if (env->spr[SPR_LPCR] & LPCR_OEE) {
> -if (env->pending_interrupts & PPC_INTERRUPT_MCK) {
> +if (pending_interrupts & PPC_INTERRUPT_MCK) {
>  return PPC_INTERRUPT_MCK;
>  }
> -if (env->pending_interrupts & PPC_INTERRUPT_HMI) {
> +if (pending_interrupts & PPC_INTERRUPT_HMI) {
>  return PPC_INTERRUPT_HMI;
>  }
>  }
>  /* Privileged Doorbell Exception */
> -if ((env->pending_interrupts & PPC_INTERRUPT_DOORBELL) &&
> +if ((pending_interrupts & PPC_INTERRUPT_DOORBELL) &&
>  (env->spr[SPR_LPCR] & LPCR_PDEE)) {
>  return PPC_INTERRUPT_DOORBELL;
>  }
>  /* Hypervisor Doorbell Exception */
> -if ((env->pending_interrupts & PPC_INTERRUPT_HDOORBELL) &&
> +if ((pending_interrupts & PPC_INTERRUPT_HDOORBELL) &&
>  (env->spr[SPR_LPCR] & LPCR_HDEE)) {
>  return PPC_INTERRUPT_HDOORBELL;
>  }
>  /* Hypervisor virtualization exception */
> -if ((env->pending_interrupts & PPC_INTERRUPT_HVIRT) &&
> +if ((pending_interrupts & PPC_INTERRUPT_HVIRT) &&
>  (env->spr[SPR_LPCR] & LPCR_HVEE)) {
>  return PPC_INTERRUPT_HVIRT;
>  }
> -if (env->pending_interrupts & PPC_INTERRUPT_RESET) {
> +if (pending_interrupts & PPC_INTERRUPT_RESET) {
>  return PPC_INTERRUPT_RESET;
>  }
>  return 0;
>  }
>  
> -static int p9_next_unmasked_interrupt(CPUPPCState *env)
> +static int p9_next_unmasked_interrupt(CPUPPCState *env,
> +  uint32_t pending_interrupts)
>  {
>  CPUState *cs = env_cpu(env);
>  
>  /* Ignore MSR[EE] when coming out of some power management states */
>  bool msr_ee = FIELD_EX64(env->msr, MSR, EE) || env->resume_as_sreset;
>  
> -assert((env->pending_interrupts & P9_UNUSED_INTERRUPTS) == 0);
> +assert((pending_interrupts & P9_UNUSED_INTERRUPTS) == 0);
>  
>  if (cs->halted) {
>  if (env->spr[SPR_PSSCR] & PSSCR_EC) {
> @@ -1903,7 +1906,7 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env)
>   * When PSSCR[EC] is set, LPCR[PECE] controls which interrupts 
> can
>   * wakeup the processor
>   */
> -return p9_interrupt_powersave(env);
> +return p9_interrupt_powersave(env, pending_interrupts);
>  } else {
>  /*
>   * When it's clear, any system-caused exception exits 
> power-saving
> @@ -1914,12 +1917,12 @@ static int p9_next_unmasked_interrupt(CPUPPCState 
> *env)
>  }
>  
>  /* Machine check exception */
> -if (env->pending_interrupts & PPC_INTERRUPT_MCK) {
> +if (pending_interrupts & PPC_INTERRUPT_MCK) {
>  return PPC_INTERRUPT_MCK;
>  }
>  
>  /* Hypervisor decrementer exception */
> -if (env->pending_interrupts & PPC_INTERRUPT_HDECR) {
> +if (pending_interrupts & PPC_INTERRUPT_HDECR) {
>  /* LPCR will be clear when not supported so this will work */
>  bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE);
>  if ((msr_ee || !FIELD_EX64_HV(env->msr)) && hdice) {
> @@ -1929,7 +1932,7 @@ static int p9

Re: [PATCH v2 1/7] target/ppc: use locally stored msr and avoid indirect access

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> hreg_compute_hflags_value already stores msr locally to be used in most
> of the logic in the routine however some instances are still using
> env->msr which is unnecessary. Use locally stored value as available.

BTW hreg_store_msr uses env->msr a bunch of times. Would a local
variable improve that too?

Thanks,
Nick

>
> Signed-off-by: Harsh Prateek Bora 
> ---
>  target/ppc/helper_regs.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index 25258986e3..945fa1a596 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -106,10 +106,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState 
> *env)
>  
>  if (ppc_flags & POWERPC_FLAG_DE) {
>  target_ulong dbcr0 = env->spr[SPR_BOOKE_DBCR0];
> -if ((dbcr0 & DBCR0_ICMP) && FIELD_EX64(env->msr, MSR, DE)) {
> +if ((dbcr0 & DBCR0_ICMP) && FIELD_EX64(msr, MSR, DE)) {
>  hflags |= 1 << HFLAGS_SE;
>  }
> -if ((dbcr0 & DBCR0_BRT) && FIELD_EX64(env->msr, MSR, DE)) {
> +if ((dbcr0 & DBCR0_BRT) && FIELD_EX64(msr, MSR, DE)) {
>  hflags |= 1 << HFLAGS_BE;
>  }
>  } else {

Re: [PATCH v2 5/7] target/ppc: optimize p9 exception handling routines for lpcr

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> Like pending_interrupts, env->spr[SPR_LPCR] is being used at multiple
> places across p9 exception handlers. Pass the value during entry and
> avoid multiple indirect accesses.

Ditto for this (does it help code, other CPU functions should
be converted similarly).

Thanks,
Nick

>
> Signed-off-by: Harsh Prateek Bora 
> ---
>  target/ppc/excp_helper.c | 33 ++---
>  1 file changed, 18 insertions(+), 15 deletions(-)
>
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 704eddac63..d3db81e6ae 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -1843,13 +1843,14 @@ static int p8_next_unmasked_interrupt(CPUPPCState 
> *env)
>   PPC_INTERRUPT_PIT | PPC_INTERRUPT_THERM)
>  
>  static int p9_interrupt_powersave(CPUPPCState *env,
> -  uint32_t pending_interrupts)
> +  uint32_t pending_interrupts,
> +  target_ulong lpcr)
>  {
>  
>  /* External Exception */
>  if ((pending_interrupts & PPC_INTERRUPT_EXT) &&
> -(env->spr[SPR_LPCR] & LPCR_EEE)) {
> -bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC);
> +(lpcr & LPCR_EEE)) {
> +bool heic = !!(lpcr & LPCR_HEIC);
>  if (!heic || !FIELD_EX64_HV(env->msr) ||
>  FIELD_EX64(env->msr, MSR, PR)) {
>  return PPC_INTERRUPT_EXT;
> @@ -1857,11 +1858,11 @@ static int p9_interrupt_powersave(CPUPPCState *env,
>  }
>  /* Decrementer Exception */
>  if ((pending_interrupts & PPC_INTERRUPT_DECR) &&
> -(env->spr[SPR_LPCR] & LPCR_DEE)) {
> +(lpcr & LPCR_DEE)) {
>  return PPC_INTERRUPT_DECR;
>  }
>  /* Machine Check or Hypervisor Maintenance Exception */
> -if (env->spr[SPR_LPCR] & LPCR_OEE) {
> +if (lpcr & LPCR_OEE) {
>  if (pending_interrupts & PPC_INTERRUPT_MCK) {
>  return PPC_INTERRUPT_MCK;
>  }
> @@ -1871,17 +1872,17 @@ static int p9_interrupt_powersave(CPUPPCState *env,
>  }
>  /* Privileged Doorbell Exception */
>  if ((pending_interrupts & PPC_INTERRUPT_DOORBELL) &&
> -(env->spr[SPR_LPCR] & LPCR_PDEE)) {
> +(lpcr & LPCR_PDEE)) {
>  return PPC_INTERRUPT_DOORBELL;
>  }
>  /* Hypervisor Doorbell Exception */
>  if ((pending_interrupts & PPC_INTERRUPT_HDOORBELL) &&
> -(env->spr[SPR_LPCR] & LPCR_HDEE)) {
> +(lpcr & LPCR_HDEE)) {
>  return PPC_INTERRUPT_HDOORBELL;
>  }
>  /* Hypervisor virtualization exception */
>  if ((pending_interrupts & PPC_INTERRUPT_HVIRT) &&
> -(env->spr[SPR_LPCR] & LPCR_HVEE)) {
> +(lpcr & LPCR_HVEE)) {
>  return PPC_INTERRUPT_HVIRT;
>  }
>  if (pending_interrupts & PPC_INTERRUPT_RESET) {
> @@ -1891,7 +1892,8 @@ static int p9_interrupt_powersave(CPUPPCState *env,
>  }
>  
>  static int p9_next_unmasked_interrupt(CPUPPCState *env,
> -  uint32_t pending_interrupts)
> +  uint32_t pending_interrupts,
> +  target_ulong lpcr)
>  {
>  CPUState *cs = env_cpu(env);
>  
> @@ -1906,7 +1908,7 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env,
>   * When PSSCR[EC] is set, LPCR[PECE] controls which interrupts 
> can
>   * wakeup the processor
>   */
> -return p9_interrupt_powersave(env, pending_interrupts);
> +return p9_interrupt_powersave(env, pending_interrupts, lpcr);
>  } else {
>  /*
>   * When it's clear, any system-caused exception exits 
> power-saving
> @@ -1924,7 +1926,7 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env,
>  /* Hypervisor decrementer exception */
>  if (pending_interrupts & PPC_INTERRUPT_HDECR) {
>  /* LPCR will be clear when not supported so this will work */
> -bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE);
> +bool hdice = !!(lpcr & LPCR_HDICE);
>  if ((msr_ee || !FIELD_EX64_HV(env->msr)) && hdice) {
>  /* HDEC clears on delivery */
>  return PPC_INTERRUPT_HDECR;
> @@ -1934,7 +1936,7 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env,
>  /* Hypervisor virtualization interrupt */
>  if (pending_interrupts & PPC_INTERRUPT_HVIRT) {
>  /* LPCR will be clear when not supported so this will work */
> -bool hvice = !!(env->spr[SPR_LPCR] & LPCR_HVICE);
> +bool hvice = !!(lpcr & LPCR_HVICE);
>  if ((msr_ee || !FIELD_EX64_HV(env->msr)) && hvice) {
>  return PPC_INTERRUPT_HVIRT;
>  }
> @@ -1942,8 +1944,8 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env,
>  
>  /* External interrupt can ignore MSR:EE under some circumstances */
>  if (pending_interrupts & PPC_INTERRUPT_EXT) {
> -bool lpes0 = !

Re: [PATCH] net: update netdev stream/dgram man page

2024-07-04 Thread Laurent Vivier


Hi Marc-André,

thank you for your comments, you're right I should not mix TCP/UDP and unix socket.  I'm 
going to fix that.


Thanks,
Laurent

On 02/07/2024 09:39, Marc-André Lureau wrote:

Hi

On Wed, Jun 26, 2024 at 7:53 PM Laurent Vivier > wrote:


Add the description of "-netdev stream" and "-netdev dgram" in the QEMU
manpage.

Add some examples on how to use them, including a way to use
"-netdev stream" and "passt" in place of "-netdev user".
("passt" is a non privileged translation proxy between layer-2,
like "-netdev stream", and layer-4 on host, like TCP, UDP,
ICMP/ICMPv6 echo)

Fixes: 5166fe0ae46d ("qapi: net: add stream and dgram netdevs")
Fixes: 13c6be96618c ("net: stream: add unix socket")
Fixes: 784e7a253104 ("net: dgram: add unix socket")
Fixes: 148fbf0d58a6 ("net: stream: add a new option to automatically 
reconnect"
Signed-off-by: Laurent Vivier mailto:lviv...@redhat.com>>


Could be easier to review if this documentation addition is splitted in various 
patches.

---
  qemu-options.hx | 189 
  1 file changed, 189 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 8ca7f34ef0c8..b8a1a65f05e7 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3353,6 +3353,195 @@ SRST
                           -device e1000,netdev=n1,mac=52:54:00:12:34:56 \\
                           -netdev socket,id=n1,mcast=239.192.168.1:1102
,localaddr=1.2.3.4

+``-netdev

stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]``
+    Configure a network backend to connect to another QEMU virtual machine 
or a proxy
using a TCP/IP socket.
+
+    ``server=on|off``
+        if ``on`` create a server socket
+
+    ``addr.host=host,addr.port=port``
+        socket address to listen on (server=on) or connect to (server=off)
+
+    ``to=maxport``
+        if present, this is range of possible addresses, with port between 
``port``
and ``maxport``.
+
+    ``numeric=on|off``
+        if ``on`` ``host`` and ``port`` are guaranteed to be numeric, 
otherwise a
name resolution should be attempted (default: ``off``)
+
+    ``keep-alive=on|off``
+        enable keep-alive when connecting to this socket.  Not supported 
for passive
sockets.
+
+    ``mptcp=on|off``
+        enable multipath TCP
+
+    ``ipv4=on|off``
+        whether to accept IPv4 addresses, default to try both IPv4 and IPv6
+
+    ``ipv6=on|off``
+        whether to accept IPv6 addresses, default to try both IPv4 and IPv6
+
+    ``reconnect=seconds``
+        for a client socket, if a socket is disconnected, then attempt a 
reconnect
after the given number of seconds.
+        Setting this to zero disables this function.  (default: 0)
+
+    Example (two guests connected using a TCP/IP socket):
+
+    .. parsed-literal::
+
+        # first VM
+        |qemu_system| linux.img \\
+                      -device virtio-net,netdev=net0,mac=52:54:00:12:34:56 
\\
+                      -netdev
stream,id=net0,server=on,addr.type=inet,addr.host=localhost,addr.port=1234
+        # second VM
+        |qemu_system| linux.img \\
+                      -device virtio-net,netdev=net0,mac=52:54:00:12:34:57 
\\
+                      -netdev

stream,id=net0,server=off,addr.type=inet,addr.host=localhost,addr.port=1234,reconnect=5
+
+``-netdev

stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]``
+    Configure a network backend to connect to another QEMU virtual machine 
or a proxy
using a TCP/UNIX socket.


  "TCP/UNIX": just UNIX instead?

+
+    ``server=on|off``
+        if ``on`` create a server socket
+
+    ``addr.path=path``
+        filesystem path to use
+
+    ``abstract=on|off``
+        if ``on``, this is a Linux abstract socket address.
+
+    ``tight=on|off``
+        if false, pad an abstract socket address with enough null bytes to 
make it
fill struct sockaddr_un member sun_path.
+
+    ``reconnect=seconds``
+        for a client socket, if a socket is disconnected, then attempt a 
reconnect
after the given number of seconds.
+        Setting this to zero disables this function.  (default: 0)
+
+    Example (using passt as a replacement of -netdev user):
+
+    .. parsed-literal::
+
+        # start passt server as a non privileged user
+        passt
+        UNIX domain socket bound at /tmp/passt_1.socket
+        # start QEMU

Re: [PATCH v2 7/7] target/ppc: redue code duplication across Power9/10 init code

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> Power9/10 initialization code consists of a lot of logical OR of
> various flag bits as supported by respective Power platform during its
> initialization, most of which is duplicated and only selected bits are
> added or removed as needed with each new platform support being added.
> Remove the duplicate code and share using common macros.

This an the previous patch are fiddly to verify but good cleanups I
think. Couple of small things.

>
> Signed-off-by: Harsh Prateek Bora 
> ---
>  target/ppc/cpu_init.h |  77 ++
>  target/ppc/cpu_init.c | 123 ++
>  2 files changed, 92 insertions(+), 108 deletions(-)
>  create mode 100644 target/ppc/cpu_init.h
>
> diff --git a/target/ppc/cpu_init.h b/target/ppc/cpu_init.h
> new file mode 100644
> index 00..53909987b0
> --- /dev/null
> +++ b/target/ppc/cpu_init.h
> @@ -0,0 +1,77 @@

This should have a SPDX license tag I guess.

I suppose doing a new header for it is the way to go. That cpu_init.c
file is enormous...

> +#ifndef TARGET_PPC_CPU_INIT_H
> +#define TARGET_PPC_CPU_INIT_H
> +
> +#define POWERPC_FAMILY_POWER9_INSNS_FLAGS   \
> +PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | \
> +PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |   \
> +PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | PPC_FLOAT_FRSQRTES |  \
> +PPC_FLOAT_STFIWX | PPC_FLOAT_EXT |PPC_CACHE | PPC_CACHE_ICBI |  \
> +PPC_CACHE_DCBZ | PPC_MEM_SYNC | PPC_MEM_EIEIO | PPC_MEM_TLBIE | \
> +PPC_MEM_TLBSYNC | PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |  \
> +PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD |   \
> +PPC_CILDST
> +
> +#define POWERPC_FAMILY_POWER9_INSNS_FLAGS2_COMMON   \
> +PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX | \
> +PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 |  \
> +PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |   \
> +PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | PPC2_ISA205 |  \
> +PPC2_ISA207S | PPC2_FP_CVT_S64 | PPC2_ISA300 | PPC2_PRCNTL |\
> +PPC2_MEM_LWSYNC | PPC2_BCDA_ISA206
> +
> +#define POWERPC_FAMILY_POWER9_INSNS_FLAGS2  \
> +POWERPC_FAMILY_POWER9_INSNS_FLAGS2_COMMON | PPC2_TM
> +#define POWERPC_FAMILY_POWER10_INSNS_FLAGS2 \
> +POWERPC_FAMILY_POWER9_INSNS_FLAGS2_COMMON | PPC2_ISA310
> +
> +#define POWERPC_POWER9_COMMON_PCC_MSR_MASK \
> +(1ull << MSR_SF) | \
> +(1ull << MSR_HV) | \
> +(1ull << MSR_VR) | \
> +(1ull << MSR_VSX) |\
> +(1ull << MSR_EE) | \
> +(1ull << MSR_PR) | \
> +(1ull << MSR_FP) | \
> +(1ull << MSR_ME) | \
> +(1ull << MSR_FE0) |\
> +(1ull << MSR_SE) | \
> +(1ull << MSR_DE) | \
> +(1ull << MSR_FE1) |\
> +(1ull << MSR_IR) | \
> +(1ull << MSR_DR) | \
> +(1ull << MSR_PMM) |\
> +(1ull << MSR_RI) | \
> +(1ull << MSR_LE)
> +
> +#define POWERPC_POWER9_PCC_MSR_MASK \
> +POWERPC_POWER9_COMMON_PCC_MSR_MASK | (1ull << MSR_TM)
> +#define POWERPC_POWER10_PCC_MSR_MASK \
> +POWERPC_POWER9_COMMON_PCC_MSR_MASK
> +#define POWERPC_POWER9_PCC_PCR_MASK \
> +PCR_COMPAT_2_05 | PCR_COMPAT_2_06 | PCR_COMPAT_2_07
> +#define POWERPC_POWER10_PCC_PCR_MASK \
> +POWERPC_POWER9_PCC_PCR_MASK | PCR_COMPAT_3_00
> +#define POWERPC_POWER9_PCC_PCR_SUPPORTED \
> +PCR_COMPAT_3_00 | PCR_COMPAT_2_07 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05
> +#define POWERPC_POWER10_PCC_PCR_SUPPORTED \
> +POWERPC_POWER9_PCC_PCR_SUPPORTED | PCR_COMPAT_3_10
> +#define POWERPC_POWER9_PCC_LPCR_MASK\
> +LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD |   \
> +(LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL |  \
> +LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | LPCR_LD | \
> +(LPCR_PECE_L_MASK & (LPCR_PDEE|LPCR_HDEE|LPCR_EEE|LPCR_DEE|LPCR_OEE)) | \
> +LPCR_MER | LPCR_GTSE | LPCR_TC | LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE |  \
> +LPCR_HDICE
> +/* DD2 adds an extra HAIL bit */
> +#define POWERPC_POWER10_PCC_LPCR_MASK \
> +POWERPC_POWER9_PCC_LPCR_MASK | LPCR_HAIL
> +#define POWERPC_POWER9_PCC_FLAGS_COMMON \
> +POWERPC_FLAG_VRE | POWERPC_FLAG_SE | POWERPC_FLAG_BE |  \
> +POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |   \
> +POWERPC_FLAG_VSX | POWERPC_FLAG_SCV
> +
> +#define POWERPC_POWER9_PCC_FLAGS  \
> +POWERPC_POWER9_PCC_FLAGS_COMMON | POWERPC_FLAG_TM
> +#

Re: [PATCH v3] memory tier: consolidate the initialization of memory tiers

2024-07-04 Thread Huang, Ying

"Ho-Ren (Jack) Chuang"  writes:

> The current memory tier initialization process is distributed across
> two different functions, memory_tier_init() and memory_tier_late_init().
> This design is hard to maintain. Thus, this patch is proposed to reduce
> the possible code paths by consolidating different
> initialization patches into one.
>
> The earlier discussion with Jonathan and Ying is listed here:
> https://lore.kernel.org/lkml/20240405150244.4...@huawei.com/
>
> If we want to put these two initializations together, they must be
> placed together in the later function. Because only at that time,
> the HMAT information will be ready, adist between nodes can be
> calculated, and memory tiering can be established based on the adist.
> So we position the initialization at memory_tier_init() to the
> memory_tier_late_init() call. Moreover, it's natural to keep
> memory_tier initialization in drivers at device_initcall() level.
>
> If we simply move the set_node_memory_tier() from memory_tier_init()
> to late_initcall(), it will result in HMAT not registering
> the mt_adistance_algorithm callback function, because
> set_node_memory_tier() is not performed during the memory tiering
> initialization phase, leading to a lack of correct default_dram
> information.
>
> Therefore, we introduced a nodemask to pass the information of the
> default DRAM nodes. The reason for not choosing to reuse
> default_dram_type->nodes is that it is not clean enough. So in the end,
> we use a __initdata variable, which is a variable that is released once
> initialization is complete, including both CPU and memory nodes for HMAT
> to iterate through.
>
> This patchset is based on commits  ("memory tier: create
> CPUless memory tiers after obtaining HMAT info") and
>  ("memory tier: dax/kmem: introduce an abstract layer for
> finding, allocating, and putting memory types"):
> [0/2] 
> https://lkml.kernel.org/r/20240405000707.2670063-1-horenchu...@bytedance.com
> [1/2] 
> https://lkml.kernel.org/r/20240405000707.2670063-2-horenchu...@bytedance.com
> [1/2] 
> https://lkml.kernel.org/r/20240405000707.2670063-3-horenchu...@bytedance.com
>
> Signed-off-by: Ho-Ren (Jack) Chuang 
> Suggested-by: Jonathan Cameron 

LGTM, Thanks!

Reviewed-by: "Huang, Ying" 

> ---
> - v3:
>  * Combine the cover letter and the patch.
>  * Remove extern default_dram_type
>  * Remove __initdata in a extern variable
>  * Roll back coding style, fix comment description
> - v2:
>  Thanks to Huang, Ying's and Andrew's comments
>  * Add a cover letter
>  * Add Suggested-by: Jonathan Cameron 
>  * Add get/put_online_mems() protection in memory_tier_late_init()
>  * If memtype is set, skip initializing its node
>  * Remove redundant code/comments or rewrite code in a cleaner manner
>  * https://lore.kernel.org/all/20240628060925.303309-1-horen.chu...@linux.dev/
> - v1:
>  * 
> https://lore.kernel.org/all/20240621044833.3953055-1-horen.chu...@linux.dev/
>
> - Open to discussion:
>  * Should we use `continue` or `break` if set_node_memory_tier() fails in
>memory_tier_late_init()? I've changed it to `continue` in v3.
>  * Compute the default_dram_nodes mask in hmat when needed? Currently,
>it still remains in memory-tiers.c
>  * Instead of using guard(mutex)(), use mutex_lock() as it was to make a
>cleaner diff?
>
> Thanks,
> Ho-Ren (Jack) Chuang
>
>  drivers/acpi/numa/hmat.c |  5 +---
>  include/linux/memory-tiers.h |  2 ++
>  mm/memory-tiers.c| 54 ++--
>  3 files changed, 24 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
> index 2c8ccc91ebe6..a2f9e7a4b479 100644
> --- a/drivers/acpi/numa/hmat.c
> +++ b/drivers/acpi/numa/hmat.c
> @@ -940,10 +940,7 @@ static int hmat_set_default_dram_perf(void)
>   struct memory_target *target;
>   struct access_coordinate *attrs;
>  
> - if (!default_dram_type)
> - return -EIO;
> -
> - for_each_node_mask(nid, default_dram_type->nodes) {
> + for_each_node_mask(nid, default_dram_nodes) {
>   pxm = node_to_pxm(nid);
>   target = find_mem_target(pxm);
>   if (!target)
> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
> index 0d70788558f4..0dc0cf2863e2 100644
> --- a/include/linux/memory-tiers.h
> +++ b/include/linux/memory-tiers.h
> @@ -38,6 +38,7 @@ struct access_coordinate;
>  #ifdef CONFIG_NUMA
>  extern bool numa_demotion_enabled;
>  extern struct memory_dev_type *default_dram_type;
> +extern nodemask_t default_dram_nodes;
>  struct memory_dev_type *alloc_memory_type(int adistance);
>  void put_memory_type(struct memory_dev_type *memtype);
>  void init_node_memory_type(int node, struct memory_dev_type *default_type);
> @@ -76,6 +77,7 @@ static inline bool node_is_toptier(int node)
>  
>  #define numa_demotion_enabledfalse
>  #define default_dram_typeNULL
> +#define default_dram_nodes   NODE_MASK_NONE
>  /*
>   * CO

Re: [PULL v3 58/85] hw/i386/fw_cfg: Add etc/e820 to fw_cfg late

2024-07-04 Thread David Woodhouse

On Wed, 2024-07-03 at 18:48 -0400, Michael S. Tsirkin wrote:
> From: David Woodhouse 

Oops, that was supposed to be

From: David Woodhouse 

Not the end of the world if it's too late to change it. 


smime.p7s
Description: S/MIME cryptographic signature

Re: [PATCH ats_vtd v5 00/22] ATS support for VT-d

2024-07-04 Thread Yi Liu


On 2024/7/4 12:36, CLEMENT MATHIEU--DRIF wrote:


On 03/07/2024 14:32, Yi Liu wrote:

Caution: External email. Do not open attachments or click links,
unless this email comes from a known sender and you know the content
is safe.


Hi, thanks for your review! very efficient!


Hi CMD,

I've went through the series. Some general suggestions on the series.

1) Patch 01, 02, 04 can be sent separately as they are fixes.

Will do

2) This series mixed the ATS and PASID capability a bit. Actually,
they don't have dependency. I'd suggest you split the series into
   - support ATS for the requests without PASID
   - support ATS for requests with PASID
The second part should be an incremental change based on the first
part. If you can make use of the existing translate() callback, then
it is possible to remove the dependency on Zhenzhong's stage-1 series.

The final purpose is to support SVM, consequently, we only add support
for ATS with PASID here


yes. but no need to put all of them in one series. Just like you sent
the PRI series separately.


3) Some commits do not have commit message. It would be good to have
it.

Ok, I will be more verbose ;)

4) Some helpers look to be used by device model, if possible, it's better
to submit them with a demo device.

The demo device is already in my GitHub repo
(https://github.com/BullSequana/qemu/tree/master)
It will be sent in a future series that adds the last features required
for SVM (splitting the series to make reviews less painful)

5) A design description in the cover-letter would be helpful.

Ok, I will elaborate


On 2024/7/2 13:52, CLEMENT MATHIEU--DRIF wrote:

From: Clement Mathieu--Drif 

This series belongs to a list of series that add SVM support for VT-d.

As a starting point, we use the series called 'intel_iommu: Enable
stage-1 translation' (rfc2) by Zhenzhong Duan and Yi Liu.

Here we focus on the implementation of ATS support in the IOMMU and
on a PCI-level
API for ATS to be used by virtual devices.

This work is based on the VT-d specification version 4.1 (March 2023).
Here is a link to a GitHub repository where you can find the
following elements :
  - Qemu with all the patches for SVM
  - ATS
  - PRI
  - Device IOTLB invalidations
  - Requests with already translated addresses
  - A demo device
  - A simple driver for the demo device
  - A userspace program (for testing and demonstration purposes)

https://github.com/BullSequana/Qemu-in-guest-SVM-demo


v2
  - handle huge pages better by detecting the page table level at
which the translation errors occur
  - Changes after review by ZhenZhong Duan :
   - Set the access bit after checking permissions
   - helper for PASID and ATS : make the commit message more
accurate ('present' replaced with 'enabled')
   - pcie_pasid_init: add PCI_PASID_CAP_WIDTH_SHIFT and use it
instead of PCI_EXT_CAP_PASID_SIZEOF for shifting the pasid width when
preparing the capability register
   - pci: do not check pci_bus_bypass_iommu after calling
pci_device_get_iommu_bus_devfn
   - do not alter formatting of IOMMUTLBEntry declaration
   - vtd_iova_fl_check_canonical : directly use s->aw_bits instead
of aw for the sake of clarity

v3
  - rebase on new version of Zhenzhong's flts implementation
  - fix the atc lookup operation (check the mask before returning
an entry)
  - add a unit test for the ATC
  - store a user pointer in the iommu notifiers to simplify the
implementation of svm devices
  Changes after review by Zhenzhong :
   - store the input pasid instead of rid2pasid when returning an
entry after a translation
   - split the ATC implementation and its unit tests

v4
  Changes after internal review
   - Fix the nowrite optimization, an ATS translation without the
nowrite flag should not fail when the write permission is not set


It's strange to list internal review here.


v5
  Changes after review by Philippe :
   - change the type of 'level' to unsigned in vtd_lookup_iotlb


list change log from latest to the earliest would be nice too. Look
forward
to your next version. :)

Regards,
Yi Liu


Clément Mathieu--Drif (22):
intel_iommu: fix FRCD construction macro.
intel_iommu: make types match
intel_iommu: return page walk level even when the translation fails
intel_iommu: do not consider wait_desc as an invalid descriptor
memory: add permissions in IOMMUAccessFlags
pcie: add helper to declare PASID capability for a pcie device
pcie: helper functions to check if PASID and ATS are enabled
intel_iommu: declare supported PASID size
pci: cache the bus mastering status in the device
pci: add IOMMU operations to get address spaces and memory regions
  with PASID
memory: store user data pointer in the IOMMU notifiers
pci: add a pci-level initialization function for iommu notifiers
intel_iommu: implement the get_ad

[PATCH] MAINTAINERS: add Stefano Garzarella as vhost/vhost-user reviewer

2024-07-04 Thread Stefano Garzarella

I have recently been working on supporting vhost-user on any POSIX,
so I want to help maintain it.

Cc: Michael S. Tsirkin 
Signed-off-by: Stefano Garzarella 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6725913c8b..47493f19d9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2207,6 +2207,7 @@ F: docs/devel/vfio-iommufd.rst
 
 vhost
 M: Michael S. Tsirkin 
+R: Stefano Garzarella 
 S: Supported
 F: hw/*/*vhost*
 F: docs/interop/vhost-user.json
-- 
2.45.2

Re: [PATCH v3] virtio: Implement Virtio Backend for SD/MMC in QEMU

2024-07-04 Thread Ми



> 4 июля 2024 г., в 10:38, Michael S. Tsirkin  написал(а):
> 
> On Thu, Jul 04, 2024 at 10:25:53AM +0300, Mikhail Krasheninnikov wrote:
>> 
>> On Wed, 3 Jul 2024, Michael S. Tsirkin wrote:
>> 
>>> On Wed, Jul 03, 2024 at 10:55:17PM +0300, Mikhail Krasheninnikov wrote:
 
 Hello, Alex!
 
 No, there's no patch to the VirtIO specification yet. This is 
 proof-of-concept solution since I'm not sure that I did everything 
 correct with the design (and as folks' reviews show, for a good reason). 
 As soon as most obvious issues would be out of the way, I think I'll 
 submit a patch.
>>> 
>>> 
>>> Mikhail, if you want people to review your patches but not merge
>>> them yet, pls use an RFC tag in the subject to avoid confusion.
>>> 
>>> Thanks,
>>> 
>>> -- 
>>> MST
>>> 
>>> 
>> 
>> Hello, Michael!
>> 
>> I was planning to submit three patches: to the kernel, emulator and Virtio 
>> specification around the same time - as soon as the obvious bugs are 
>> fixed, I'll submit a patch to the specification. I thought it wasn't 
>> necessary to use the RFC tag in that case, but if you think it is, 
>> I'll include it with the next version of the patch.
> 
> RFC means "this is proof of concept". On the one hand some people
> won't bother reviewing then. On the other your patch will be
> judged less harshly. If your code still has debugging printks,
> it's clearly an RFC at best.
> 
> -- 
> MST
> 

I apologize for the debug printks in other patch, I’m not really sure
how I gazed over it.. If it makes my situation better, it’s my first series
of patches. I’ll make sure to triple-check next time. Thanks for the
feedback!

[PATCH v3 0/8] support AST2700 network

2024-07-04 Thread Jamin Lin via

change from v1:
- ftgmac100
 - fix coding style
 - support 64 bits dma dram address for AST2700

change from v2:
- ftgmac100: update memory region size to 0x200.
- ftgmac100: introduce a new class(ftgmac100_high),
class attribute and memop handlers, for FTGMAC100_*_HIGH regs read/write.
- aspeed_ast27x0: update network model to ftgmac100_high to support
  64 bits dram address DMA.
- m25p80: support quad mode for w25q01jvq

change from v3:
- ftgmac100: update memory region size to 64KB.
- ftgmac100: using a property to activate the region for new registers,
  instead of a class
- ftgmac100: introduce TX and RX ring base address high registers
- ftgmac100: split standalone patch for easy review
- ftgmac100: update TX and RX packet buffers address to 64 bits
- aspeed_ast27x0: set dma64 property for AST2700 ftgmac100
- machine_aspeed.py: update to test sdk v09.02 and network for AST2700

Jamin Lin (8):
  hw/net:ftgmac100: update memory region size to 64KB
  hw/net:ftgmac100: update ring base address to 64 bits
  hw/net:ftgmac100: introduce TX and RX ring base address high registers
to support 64 bits
  hw/net:ftgmac100: update TX and RX packet buffers address to 64 bits
  aspeed/soc: set dma64 property for AST2700 ftgmac100
  hw/block: m25p80: support quad mode for w25q01jvq
  machine_aspeed.py: update to test ASPEED OpenBMC SDK v09.02 for
AST2700
  machine_aspeed.py: update to test network for AST2700

 hw/arm/aspeed_ast27x0.c |   3 +
 hw/block/m25p80.c   |  16 
 hw/net/ftgmac100.c  | 147 +++-
 include/hw/net/ftgmac100.h  |  17 ++--
 tests/avocado/machine_aspeed.py |  12 +--
 5 files changed, 162 insertions(+), 33 deletions(-)

-- 
2.34.1

[PATCH v3 6/8] hw/block: m25p80: support quad mode for w25q01jvq

2024-07-04 Thread Jamin Lin via

According to the w25q01jv datasheet at page 16,
it is required to set QE bit in "Status Register 2".
Besides, users are able to utilize "Write Status Register 1(0x01)"
command to set QE bit in "Status Register 2" and
utilize "Read Status Register 2(0x35)" command to get the QE bit status.

To support quad mode for w25q01jvq, update collecting data needed
2 bytes for WRSR command in decode_new_cmd function and
verify QE bit at the second byte of collecting data bit 2
in complete_collecting_data.

Update RDCR_EQIO command to set bit 2 of return data
if quad mode enable in decode_new_cmd.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
Reviewed-by: Cédric Le Goater 
---
 hw/block/m25p80.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 8dec134832..9e99107b42 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -416,6 +416,7 @@ typedef enum {
 /*
  * Micron: 0x35 - enable QPI
  * Spansion: 0x35 - read control register
+ * Winbond: 0x35 - quad enable
  */
 RDCR_EQIO = 0x35,
 RSTQIO = 0xf5,
@@ -798,6 +799,11 @@ static void complete_collecting_data(Flash *s)
 s->four_bytes_address_mode = extract32(s->data[1], 5, 1);
 }
 break;
+case MAN_WINBOND:
+if (s->len > 1) {
+s->quad_enable = !!(s->data[1] & 0x02);
+}
+break;
 default:
 break;
 }
@@ -1254,6 +1260,10 @@ static void decode_new_cmd(Flash *s, uint32_t value)
 s->needed_bytes = 2;
 s->state = STATE_COLLECTING_VAR_LEN_DATA;
 break;
+case MAN_WINBOND:
+s->needed_bytes = 2;
+s->state = STATE_COLLECTING_VAR_LEN_DATA;
+break;
 default:
 s->needed_bytes = 1;
 s->state = STATE_COLLECTING_DATA;
@@ -1431,6 +1441,12 @@ static void decode_new_cmd(Flash *s, uint32_t value)
 case MAN_MACRONIX:
 s->quad_enable = true;
 break;
+case MAN_WINBOND:
+s->data[0] = (!!s->quad_enable) << 1;
+s->pos = 0;
+s->len = 1;
+s->state = STATE_READING_DATA;
+break;
 default:
 break;
 }
-- 
2.34.1

[PATCH v3 3/8] hw/net:ftgmac100: introduce TX and RX ring base address high registers to support 64 bits

2024-07-04 Thread Jamin Lin via

ASPEED AST2700 SOC is a 64 bits quad core CPUs (Cortex-a35)
And the base address of dram is "0x4 " which
is 64bits address.

It have "Normal Priority Transmit Ring Base Address Register High(0x17C)",
"High Priority Transmit Ring Base Address Register High(0x184)" and
"Receive Ring Base Address Register High(0x18C)" to save the high part physical
address of descriptor manager.
Ex: TX descriptor manager address [34:0]
The "Normal Priority Transmit Ring Base Address Register High(0x17C)"
bits [2:0] which corresponds the bits [34:32] of the 64 bits address of
the TX ring buffer address.
The "Normal Priority Transmit Ring Base Address Register(0x20)" bits [31:0]
which corresponds the bits [31:0] of the 64 bits address
of the TX ring buffer address.

Introduce a new sub region which size is 0x100 for the set of new registers
and map it at 0x100 in the container region.
This sub region range is from 0x100 to 0x1ff.

Introduce a new property and object attribute to activate the region for new 
registers.
Introduce a new memop handlers for the new register read and write.

Signed-off-by: Jamin Lin 
---
 hw/net/ftgmac100.c | 82 ++
 include/hw/net/ftgmac100.h |  4 ++
 2 files changed, 86 insertions(+)

diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index d026242e2b..68956aeb94 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -56,6 +56,16 @@
 #define FTGMAC100_PHYDATA 0x64
 #define FTGMAC100_FCR 0x68
 
+/*
+ * FTGMAC100 registers high
+ *
+ * values below are offset by - FTGMAC100_REG_HIGH_OFFSET from datasheet
+ * because its memory region is start at FTGMAC100_REG_HIGH_OFFSET
+ */
+#define FTGMAC100_NPTXR_BADR_HIGH   (0x17C - FTGMAC100_REG_HIGH_OFFSET)
+#define FTGMAC100_HPTXR_BADR_HIGH   (0x184 - FTGMAC100_REG_HIGH_OFFSET)
+#define FTGMAC100_RXR_BADR_HIGH (0x18C - FTGMAC100_REG_HIGH_OFFSET)
+
 /*
  * Interrupt status register & interrupt enable register
  */
@@ -913,6 +923,60 @@ static void ftgmac100_write(void *opaque, hwaddr addr,
 ftgmac100_update_irq(s);
 }
 
+static uint64_t ftgmac100_high_read(void *opaque, hwaddr addr, unsigned size)
+{
+FTGMAC100State *s = FTGMAC100(opaque);
+uint64_t val = 0;
+
+switch (addr) {
+case FTGMAC100_NPTXR_BADR_HIGH:
+val = extract64(s->tx_ring, 32, 32);
+break;
+case FTGMAC100_HPTXR_BADR_HIGH:
+/* High Priority Transmit Ring Base High Address */
+qemu_log_mask(LOG_UNIMP, "%s: read to unimplemented register 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+break;
+case FTGMAC100_RXR_BADR_HIGH:
+val = extract64(s->rx_ring, 32, 32);
+break;
+default:
+qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+break;
+}
+
+return val;
+}
+
+static void ftgmac100_high_write(void *opaque, hwaddr addr,
+  uint64_t value, unsigned size)
+{
+FTGMAC100State *s = FTGMAC100(opaque);
+
+switch (addr) {
+case FTGMAC100_NPTXR_BADR_HIGH:
+s->tx_ring = deposit64(s->tx_ring, 32, 32, value);
+s->tx_descriptor = deposit64(s->tx_descriptor, 32, 32, value);
+break;
+case FTGMAC100_HPTXR_BADR_HIGH:
+/* High Priority Transmit Ring Base High Address */
+qemu_log_mask(LOG_UNIMP, "%s: write to unimplemented register 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+break;
+case FTGMAC100_RXR_BADR_HIGH:
+s->rx_ring = deposit64(s->rx_ring, 32, 32, value);
+s->rx_descriptor = deposit64(s->rx_descriptor, 32, 32, value);
+break;
+default:
+qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+break;
+}
+
+ftgmac100_update_irq(s);
+}
+
 static int ftgmac100_filter(FTGMAC100State *s, const uint8_t *buf, size_t len)
 {
 unsigned mcast_idx;
@@ -1077,6 +1141,14 @@ static const MemoryRegionOps ftgmac100_ops = {
 .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static const MemoryRegionOps ftgmac100_high_ops = {
+.read = ftgmac100_high_read,
+.write = ftgmac100_high_write,
+.valid.min_access_size = 4,
+.valid.max_access_size = 4,
+.endianness = DEVICE_LITTLE_ENDIAN,
+};
+
 static void ftgmac100_cleanup(NetClientState *nc)
 {
 FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc));
@@ -1114,6 +1186,15 @@ static void ftgmac100_realize(DeviceState *dev, Error 
**errp)
   TYPE_FTGMAC100 ".regs", FTGMAC100_REG_MEM_SIZE);
 memory_region_add_subregion(&s->iomem_container, 0x0, &s->iomem);
 
+if (s->dma64) {
+memory_region_init_io(&s->iomem_high, OBJECT(s), &ftgmac100_high_ops,
+  s, TYPE_FTGMAC100 ".regs.high",
+  FTGMAC100_REG_HIGH_MEM_SIZE);
+memory_region_add_subregion(&s->iomem_container,
+

[PATCH v3 1/8] hw/net:ftgmac100: update memory region size to 64KB

2024-07-04 Thread Jamin Lin via

According to the datasheet of ASPEED SOCs,
one MAC controller owns 128KB of register space for AST2500.
However, one MAC controller only owns 64KB of register space for AST2600
and AST2700. It set the memory region size 128KB and it occupied another
controllers Address Spaces.

Update one MAC controller memory region size to 0x1000
because AST2500 did not use register spaces over than 64KB.

Introduce a new container region size to 0x1000 and its range
is from 0 to 0xfff. This container is mapped a sub region
for the current set of register.
This sub region range is from 0 to 0xff.

Signed-off-by: Jamin Lin 
---
 hw/net/ftgmac100.c | 11 ---
 include/hw/net/ftgmac100.h |  4 
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index 25e4c0cd5b..9e1f12cd33 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -1107,9 +1107,14 @@ static void ftgmac100_realize(DeviceState *dev, Error 
**errp)
 s->rxdes0_edorr = FTGMAC100_RXDES0_EDORR;
 }
 
-memory_region_init_io(&s->iomem, OBJECT(dev), &ftgmac100_ops, s,
-  TYPE_FTGMAC100, 0x2000);
-sysbus_init_mmio(sbd, &s->iomem);
+memory_region_init(&s->iomem_container, OBJECT(s),
+   TYPE_FTGMAC100 ".container", FTGMAC100_MEM_SIZE);
+sysbus_init_mmio(sbd, &s->iomem_container);
+
+memory_region_init_io(&s->iomem, OBJECT(s), &ftgmac100_ops, s,
+  TYPE_FTGMAC100 ".regs", FTGMAC100_REG_MEM_SIZE);
+memory_region_add_subregion(&s->iomem_container, 0x0, &s->iomem);
+
 sysbus_init_irq(sbd, &s->irq);
 qemu_macaddr_default_if_unset(&s->conf.macaddr);
 
diff --git a/include/hw/net/ftgmac100.h b/include/hw/net/ftgmac100.h
index 765d1538a4..269446e858 100644
--- a/include/hw/net/ftgmac100.h
+++ b/include/hw/net/ftgmac100.h
@@ -14,6 +14,9 @@
 #define TYPE_FTGMAC100 "ftgmac100"
 OBJECT_DECLARE_SIMPLE_TYPE(FTGMAC100State, FTGMAC100)
 
+#define FTGMAC100_MEM_SIZE 0x1000
+#define FTGMAC100_REG_MEM_SIZE 0x100
+
 #include "hw/sysbus.h"
 #include "net/net.h"
 
@@ -30,6 +33,7 @@ struct FTGMAC100State {
 NICState *nic;
 NICConf conf;
 qemu_irq irq;
+MemoryRegion iomem_container;
 MemoryRegion iomem;
 
 uint8_t frame[FTGMAC100_MAX_FRAME_SIZE];
-- 
2.34.1

[PATCH v3 2/8] hw/net:ftgmac100: update ring base address to 64 bits

2024-07-04 Thread Jamin Lin via

Update TX and RX ring base address data type to uint64_t for
64 bits dram address DMA support.

Both "Normal Priority Transmit Ring Base Address Register(0x20)" and
"Receive Ring Base Address Register (0x24)" are used for saving the
low part physical address of descriptor manager.

Therefore, changes to set TX and RX descriptor manager address bits [31:0]
in ftgmac100_read and ftgmac100_write functions.

Incrementing the version of vmstate to 2.

Signed-off-by: Jamin Lin 
---
 hw/net/ftgmac100.c | 33 -
 include/hw/net/ftgmac100.h |  9 -
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index 9e1f12cd33..d026242e2b 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -515,12 +515,12 @@ out:
 return frame_size;
 }
 
-static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring,
-uint32_t tx_descriptor)
+static void ftgmac100_do_tx(FTGMAC100State *s, uint64_t tx_ring,
+uint64_t tx_descriptor)
 {
 int frame_size = 0;
 uint8_t *ptr = s->frame;
-uint32_t addr = tx_descriptor;
+uint64_t addr = tx_descriptor;
 uint32_t flags = 0;
 
 while (1) {
@@ -726,9 +726,9 @@ static uint64_t ftgmac100_read(void *opaque, hwaddr addr, 
unsigned size)
 case FTGMAC100_MATH1:
 return s->math[1];
 case FTGMAC100_RXR_BADR:
-return s->rx_ring;
+return extract64(s->rx_ring, 0, 32);
 case FTGMAC100_NPTXR_BADR:
-return s->tx_ring;
+return extract64(s->tx_ring, 0, 32);
 case FTGMAC100_ITC:
 return s->itc;
 case FTGMAC100_DBLAC:
@@ -799,9 +799,8 @@ static void ftgmac100_write(void *opaque, hwaddr addr,
   HWADDR_PRIx "\n", __func__, value);
 return;
 }
-
-s->rx_ring = value;
-s->rx_descriptor = s->rx_ring;
+s->rx_ring = deposit64(s->rx_ring, 0, 32, value);
+s->rx_descriptor = deposit64(s->rx_descriptor, 0, 32, value);
 break;
 
 case FTGMAC100_RBSR: /* DMA buffer size */
@@ -814,8 +813,8 @@ static void ftgmac100_write(void *opaque, hwaddr addr,
   HWADDR_PRIx "\n", __func__, value);
 return;
 }
-s->tx_ring = value;
-s->tx_descriptor = s->tx_ring;
+s->tx_ring = deposit64(s->tx_ring, 0, 32, value);
+s->tx_descriptor = deposit64(s->tx_descriptor, 0, 32, value);
 break;
 
 case FTGMAC100_NPTXPD: /* Trigger transmit */
@@ -957,7 +956,7 @@ static ssize_t ftgmac100_receive(NetClientState *nc, const 
uint8_t *buf,
 FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc));
 FTGMAC100Desc bd;
 uint32_t flags = 0;
-uint32_t addr;
+uint64_t addr;
 uint32_t crc;
 uint32_t buf_addr;
 uint8_t *crc_ptr;
@@ -1126,18 +1125,14 @@ static void ftgmac100_realize(DeviceState *dev, Error 
**errp)
 
 static const VMStateDescription vmstate_ftgmac100 = {
 .name = TYPE_FTGMAC100,
-.version_id = 1,
-.minimum_version_id = 1,
+.version_id = 2,
+.minimum_version_id = 2,
 .fields = (const VMStateField[]) {
 VMSTATE_UINT32(irq_state, FTGMAC100State),
 VMSTATE_UINT32(isr, FTGMAC100State),
 VMSTATE_UINT32(ier, FTGMAC100State),
 VMSTATE_UINT32(rx_enabled, FTGMAC100State),
-VMSTATE_UINT32(rx_ring, FTGMAC100State),
 VMSTATE_UINT32(rbsr, FTGMAC100State),
-VMSTATE_UINT32(tx_ring, FTGMAC100State),
-VMSTATE_UINT32(rx_descriptor, FTGMAC100State),
-VMSTATE_UINT32(tx_descriptor, FTGMAC100State),
 VMSTATE_UINT32_ARRAY(math, FTGMAC100State, 2),
 VMSTATE_UINT32(itc, FTGMAC100State),
 VMSTATE_UINT32(aptcr, FTGMAC100State),
@@ -1156,6 +1151,10 @@ static const VMStateDescription vmstate_ftgmac100 = {
 VMSTATE_UINT32(phy_int_mask, FTGMAC100State),
 VMSTATE_UINT32(txdes0_edotr, FTGMAC100State),
 VMSTATE_UINT32(rxdes0_edorr, FTGMAC100State),
+VMSTATE_UINT64(rx_ring, FTGMAC100State),
+VMSTATE_UINT64(tx_ring, FTGMAC100State),
+VMSTATE_UINT64(rx_descriptor, FTGMAC100State),
+VMSTATE_UINT64(tx_descriptor, FTGMAC100State),
 VMSTATE_END_OF_LIST()
 }
 };
diff --git a/include/hw/net/ftgmac100.h b/include/hw/net/ftgmac100.h
index 269446e858..aae57ae8cb 100644
--- a/include/hw/net/ftgmac100.h
+++ b/include/hw/net/ftgmac100.h
@@ -42,10 +42,6 @@ struct FTGMAC100State {
 uint32_t isr;
 uint32_t ier;
 uint32_t rx_enabled;
-uint32_t rx_ring;
-uint32_t rx_descriptor;
-uint32_t tx_ring;
-uint32_t tx_descriptor;
 uint32_t math[2];
 uint32_t rbsr;
 uint32_t itc;
@@ -58,7 +54,10 @@ struct FTGMAC100State {
 uint32_t phycr;
 uint32_t phydata;
 uint32_t fcr;
-
+uint64_t rx_ring;
+uint64_t rx_descriptor;
+uint64_t tx_ring;
+uint64_t tx_descriptor;
 
 uint32_t phy_status;
 uint32_t phy_control;

[PATCH v3 7/8] machine_aspeed.py: update to test ASPEED OpenBMC SDK v09.02 for AST2700

2024-07-04 Thread Jamin Lin via

Update test case to test ASPEED OpenBMC SDK v09.02 for AST2700.

ASPEED fixed TX mask issue from linux/drivers/ftgmac100.c.
It is required to use ASPEED OpenBMC SDK since v09.02
for AST2700 QEMU network testing.

A test image is downloaded from the ASPEED Forked OpenBMC GitHub
release repository :
https://github.com/AspeedTech-BMC/openbmc/releases/

Signed-off-by: Jamin Lin 
---
 tests/avocado/machine_aspeed.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py
index 3a20644fb2..13fe128fc9 100644
--- a/tests/avocado/machine_aspeed.py
+++ b/tests/avocado/machine_aspeed.py
@@ -387,15 +387,15 @@ def test_arm_ast2600_evb_sdk(self):
 year = time.strftime("%Y")
 self.ssh_command_output_contains('/sbin/hwclock -f /dev/rtc1', year);
 
-def test_aarch64_ast2700_evb_sdk_v09_01(self):
+def test_aarch64_ast2700_evb_sdk_v09_02(self):
 """
 :avocado: tags=arch:aarch64
 :avocado: tags=machine:ast2700-evb
 """
 
 image_url = ('https://github.com/AspeedTech-BMC/openbmc/releases/'
- 'download/v09.01/ast2700-default-obmc.tar.gz')
-image_hash = 
'b1cc0fd73c7650d34c9c8459a243f52a91e9e27144b8608b2645ab19461d1e07'
+ 'download/v09.02/ast2700-default-obmc.tar.gz')
+image_hash = 
'ac969c2602f4e6bdb69562ff466b89ae3fe1d86e1f6797bb7969d787f82116a7'
 image_path = self.fetch_asset(image_url, asset_hash=image_hash,
   algorithm='sha256')
 archive.extract(image_path, self.workdir)
-- 
2.34.1

[PATCH v3 8/8] machine_aspeed.py: update to test network for AST2700

2024-07-04 Thread Jamin Lin via

Update test case to test network connection via SSH.

Test command:
```
cd build
pyvenv/bin/avocado run 
../qemu/tests/avocado/machine_aspeed.py:AST2x00MachineSDK.test_aarch64_ast2700_evb_sdk_v09_02
```

Signed-off-by: Jamin Lin 
---
 tests/avocado/machine_aspeed.py | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py
index 13fe128fc9..f66ad38d35 100644
--- a/tests/avocado/machine_aspeed.py
+++ b/tests/avocado/machine_aspeed.py
@@ -313,14 +313,14 @@ def do_test_arm_aspeed_sdk_start(self, image):
 
 def do_test_aarch64_aspeed_sdk_start(self, image):
 self.vm.set_console()
-self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw')
+self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw',
+ '-net', 'nic', '-net', 
'user,hostfwd=:127.0.0.1:0-:22')
 
 self.vm.launch()
 
 self.wait_for_console_pattern('U-Boot 2023.10')
 self.wait_for_console_pattern('## Loading kernel from FIT Image')
 self.wait_for_console_pattern('Starting kernel ...')
-self.wait_for_console_pattern("systemd[1]: Hostname set to")
 
 @skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test is unstable on 
GitLab')
 
@@ -436,4 +436,6 @@ def test_aarch64_ast2700_evb_sdk_v09_02(self):
 
 self.vm.add_args('-smp', str(num_cpu))
 self.do_test_aarch64_aspeed_sdk_start(image_dir + 'image-bmc')
+self.wait_for_console_pattern('nodistro.0 ast2700-default ttyS12')
+self.ssh_connect('root', '0penBmc', False)
 
-- 
2.34.1

[PATCH v3 4/8] hw/net:ftgmac100: update TX and RX packet buffers address to 64 bits

2024-07-04 Thread Jamin Lin via

ASPEED AST2700 SOC is a 64 bits quad core CPUs (Cortex-a35)
And the base address of dram is "0x4 " which
is 64bits address.

It have "TXDES 2" and "RXDES 2" to save the high part
physical address of packet buffer.
Ex: TX packet buffer address [34:0]
The "TXDES 2" bits [18:16] which corresponds the bits [34:32]
of the 64 bits address of the TX packet buffer address
and "TXDES 3" bits [31:0] which corresponds the bits [31:0]
of the 64 bits address of the TX packet buffer address.

Update TX and RX packet buffers address type to
64 bits for dram 64 bits address DMA support.

Signed-off-by: Jamin Lin 
---
 hw/net/ftgmac100.c | 21 ++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index 68956aeb94..80f9cd56d5 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -175,6 +175,8 @@
 #define FTGMAC100_TXDES1_TX2FIC  (1 << 30)
 #define FTGMAC100_TXDES1_TXIC(1 << 31)
 
+#define FTGMAC100_TXDES2_TXBUF_BADR_HI(x)   (((x) >> 16) & 0x7)
+
 /*
  * Receive descriptor
  */
@@ -208,13 +210,15 @@
 #define FTGMAC100_RXDES1_UDP_CHKSUM_ERR  (1 << 26)
 #define FTGMAC100_RXDES1_IP_CHKSUM_ERR   (1 << 27)
 
+#define FTGMAC100_RXDES2_RXBUF_BADR_HI(x)   (((x) >> 16) & 0x7)
+
 /*
  * Receive and transmit Buffer Descriptor
  */
 typedef struct {
 uint32_tdes0;
 uint32_tdes1;
-uint32_tdes2;/* not used by HW */
+uint32_tdes2;/* used by HW 64 bits DMA */
 uint32_tdes3;
 } FTGMAC100Desc;
 
@@ -531,6 +535,7 @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint64_t 
tx_ring,
 int frame_size = 0;
 uint8_t *ptr = s->frame;
 uint64_t addr = tx_descriptor;
+uint64_t buf_addr = 0;
 uint32_t flags = 0;
 
 while (1) {
@@ -569,7 +574,12 @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint64_t 
tx_ring,
 len =  sizeof(s->frame) - frame_size;
 }
 
-if (dma_memory_read(&address_space_memory, bd.des3,
+buf_addr = bd.des3;
+if (s->dma64) {
+buf_addr = deposit64(buf_addr, 32, 32,
+ FTGMAC100_TXDES2_TXBUF_BADR_HI(bd.des2));
+}
+if (dma_memory_read(&address_space_memory, buf_addr,
 ptr, len, MEMTXATTRS_UNSPECIFIED)) {
 qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to read packet @ 
0x%x\n",
   __func__, bd.des3);
@@ -1022,7 +1032,7 @@ static ssize_t ftgmac100_receive(NetClientState *nc, 
const uint8_t *buf,
 uint32_t flags = 0;
 uint64_t addr;
 uint32_t crc;
-uint32_t buf_addr;
+uint64_t buf_addr = 0;
 uint8_t *crc_ptr;
 uint32_t buf_len;
 size_t size = len;
@@ -1087,7 +1097,12 @@ static ssize_t ftgmac100_receive(NetClientState *nc, 
const uint8_t *buf,
 if (size < 4) {
 buf_len += size - 4;
 }
+
 buf_addr = bd.des3;
+if (s->dma64) {
+buf_addr = deposit64(buf_addr, 32, 32,
+ FTGMAC100_RXDES2_RXBUF_BADR_HI(bd.des2));
+}
 if (first && proto == ETH_P_VLAN && buf_len >= 18) {
 bd.des1 = lduw_be_p(buf + 14) | FTGMAC100_RXDES1_VLANTAG_AVAIL;
 
-- 
2.34.1

[PATCH v3 5/8] aspeed/soc: set dma64 property for AST2700 ftgmac100

2024-07-04 Thread Jamin Lin via

ASPEED AST2700 SOC is a 64 bits quad core CPUs (Cortex-a35)
And the base address of dram is "0x4 " which
is 64bits address.

Set dma64 property for ftgmac100 model to support
64bits dram address DMA.

Signed-off-by: Jamin Lin 
---
 hw/arm/aspeed_ast27x0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c
index 18e6a8b10c..a9fb0d4b88 100644
--- a/hw/arm/aspeed_ast27x0.c
+++ b/hw/arm/aspeed_ast27x0.c
@@ -552,9 +552,12 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, 
Error **errp)
 return;
 }
 
+/* Net */
 for (i = 0; i < sc->macs_num; i++) {
 object_property_set_bool(OBJECT(&s->ftgmac100[i]), "aspeed", true,
  &error_abort);
+object_property_set_bool(OBJECT(&s->ftgmac100[i]), "dma64", true,
+ &error_abort);
 if (!sysbus_realize(SYS_BUS_DEVICE(&s->ftgmac100[i]), errp)) {
 return;
 }
-- 
2.34.1

[PATCH 1/4] accel/kvm/kvm-all: Fix superfluous trailing semicolon

2024-07-04 Thread Zhao Liu

Signed-off-by: Zhao Liu 
---
 accel/kvm/kvm-all.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 2b4ab896794b..64bf47a03300 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3878,7 +3878,7 @@ static StatsList *add_kvmstat_entry(struct kvm_stats_desc 
*pdesc,
 /* Alloc and populate data list */
 stats = g_new0(Stats, 1);
 stats->name = g_strdup(pdesc->name);
-stats->value = g_new0(StatsValue, 1);;
+stats->value = g_new0(StatsValue, 1);
 
 if ((pdesc->flags & KVM_STATS_UNIT_MASK) == KVM_STATS_UNIT_BOOLEAN) {
 stats->value->u.boolean = *stats_data;
-- 
2.34.1

[PATCH 2/4] hw/i386/x86: Fix superfluous trailing semicolon

2024-07-04 Thread Zhao Liu

Signed-off-by: Zhao Liu 
---
 hw/i386/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index a4aa8e081098..01fc5e656272 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -242,7 +242,7 @@ static void x86_machine_get_pit(Object *obj, Visitor *v, 
const char *name,
 static void x86_machine_set_pit(Object *obj, Visitor *v, const char *name,
 void *opaque, Error **errp)
 {
-X86MachineState *x86ms = X86_MACHINE(obj);;
+X86MachineState *x86ms = X86_MACHINE(obj);
 
 visit_type_OnOffAuto(v, name, &x86ms->pit, errp);
 }
-- 
2.34.1

[PATCH 4/4] target/hexagon/imported/mmvec: Fix superfluous trailing semicolon

2024-07-04 Thread Zhao Liu

Fix the superfluous trailing semicolon in target/hexagon/imported/mmvec/
ext.idef.

Cc: Brian Cain 
Signed-off-by: Zhao Liu 
---
 target/hexagon/imported/mmvec/ext.idef | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/hexagon/imported/mmvec/ext.idef 
b/target/hexagon/imported/mmvec/ext.idef
index 98daabfb07c4..03d31f6181d7 100644
--- a/target/hexagon/imported/mmvec/ext.idef
+++ b/target/hexagon/imported/mmvec/ext.idef
@@ -2855,7 +2855,7 @@ EXTINSN(V6_vscattermhw_add,  
"vscatter(Rt32,Mu2,Vvv32.w).h+=Vw32", ATTRIBS(A_EXT
 fVALIGN(RtV, element_size);
 fVFOREACH(32, i) {
 for(j = 0; j < 2; j++) {
- EA =  RtV + fVALIGN(VvvV.v[j].uw[i],ALIGNMENT);;
+ EA =  RtV + fVALIGN(VvvV.v[j].uw[i],ALIGNMENT);
  
fVLOG_VTCM_HALFWORD_INCREMENT_DV(EA,VvvV.v[j].uw[i],VwV,(2*i+j),i,j,ALIGNMENT,MuV);
 }
 }
-- 
2.34.1

[PATCH 3/4] util/oslib-posix: Fix superfluous trailing semicolon

2024-07-04 Thread Zhao Liu

Signed-off-by: Zhao Liu 
---
 util/oslib-posix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index e76441695bdc..b090fe0eed0d 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -263,7 +263,7 @@ int qemu_socketpair(int domain, int type, int protocol, int 
sv[2])
 return ret;
 }
 #endif
-ret = socketpair(domain, type, protocol, sv);;
+ret = socketpair(domain, type, protocol, sv);
 if (ret == 0) {
 qemu_set_cloexec(sv[0]);
 qemu_set_cloexec(sv[1]);
-- 
2.34.1

[PATCH 0/4] trivial: Fix superfluous trailing semicolon

2024-07-04 Thread Zhao Liu

Hi,

I checked the files in QEMU to fix these few errors about "superfluous
trailing semicolon" to honor the requirement in checkpatch.pl.

Thanks and Best Regards,
Zhao
---
Zhao Liu (4):
  accel/kvm/kvm-all: Fix superfluous trailing semicolon
  hw/i386/x86: Fix superfluous trailing semicolon
  util/oslib-posix: Fix superfluous trailing semicolon
  target/hexagon/imported/mmvec: Fix superfluous trailing semicolon

 accel/kvm/kvm-all.c| 2 +-
 hw/i386/x86.c  | 2 +-
 target/hexagon/imported/mmvec/ext.idef | 2 +-
 util/oslib-posix.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

-- 
2.34.1

Re: [PATCH] i386/sev: Don't allow automatic fallback to legacy KVM_SEV*_INIT

2024-07-04 Thread Daniel P . Berrangé

On Thu, Jul 04, 2024 at 08:51:05AM +0200, Paolo Bonzini wrote:
> On Thu, Jul 4, 2024 at 2:01 AM Michael Roth  wrote:
> > Currently if the 'legacy-vm-type' property of the sev-guest object is
> > left unset, QEMU will attempt to use the newer KVM_SEV_INIT2 kernel
> > interface in conjunction with the newer KVM_X86_SEV_VM and
> > KVM_X86_SEV_ES_VM KVM VM types.
> >
> > This can lead to measurement changes if, for instance, an SEV guest was
> > created on a host that originally had an older kernel that didn't
> > support KVM_SEV_INIT2, but is booted on the same host later on after the
> > host kernel was upgraded.
> 
> I think this is the right thing to do for SEV-ES. I agree that it's
> bad to require a very new kernel (6.10 will be released only a month
> before QEMU 9.1), on the other hand the KVM_SEV_ES_INIT API is broken
> in several ways. As long as there is a way to go back to it, and it's
> not changed by old machine types, not using it for SEV-ES is the
> better choice for upstream.

Broken how ?   I know there was the regression with the 'debug_swap'
parameter, but was something that should just be fixed in the kernel,
rather than breaking userspace. What else is a problem ?

I don't think its reasonable for QEMU to require a brand new kernel
for new machine types, given SEV & SEV-ES have been deployed for
many years already. 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v3 1/8] hw/net:ftgmac100: update memory region size to 64KB

2024-07-04 Thread Cédric Le Goater


On 7/4/24 10:29 AM, Jamin Lin wrote:

According to the datasheet of ASPEED SOCs,
one MAC controller owns 128KB of register space for AST2500.
However, one MAC controller only owns 64KB of register space for AST2600
and AST2700. It set the memory region size 128KB and it occupied another
controllers Address Spaces.

Update one MAC controller memory region size to 0x1000
because AST2500 did not use register spaces over than 64KB.

Introduce a new container region size to 0x1000 and its range
is from 0 to 0xfff. This container is mapped a sub region
for the current set of register.
This sub region range is from 0 to 0xff.

Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/net/ftgmac100.c | 11 ---
  include/hw/net/ftgmac100.h |  4 
  2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index 25e4c0cd5b..9e1f12cd33 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -1107,9 +1107,14 @@ static void ftgmac100_realize(DeviceState *dev, Error 
**errp)
  s->rxdes0_edorr = FTGMAC100_RXDES0_EDORR;
  }
  
-memory_region_init_io(&s->iomem, OBJECT(dev), &ftgmac100_ops, s,

-  TYPE_FTGMAC100, 0x2000);
-sysbus_init_mmio(sbd, &s->iomem);
+memory_region_init(&s->iomem_container, OBJECT(s),
+   TYPE_FTGMAC100 ".container", FTGMAC100_MEM_SIZE);
+sysbus_init_mmio(sbd, &s->iomem_container);
+
+memory_region_init_io(&s->iomem, OBJECT(s), &ftgmac100_ops, s,
+  TYPE_FTGMAC100 ".regs", FTGMAC100_REG_MEM_SIZE);
+memory_region_add_subregion(&s->iomem_container, 0x0, &s->iomem);
+
  sysbus_init_irq(sbd, &s->irq);
  qemu_macaddr_default_if_unset(&s->conf.macaddr);
  
diff --git a/include/hw/net/ftgmac100.h b/include/hw/net/ftgmac100.h

index 765d1538a4..269446e858 100644
--- a/include/hw/net/ftgmac100.h
+++ b/include/hw/net/ftgmac100.h
@@ -14,6 +14,9 @@
  #define TYPE_FTGMAC100 "ftgmac100"
  OBJECT_DECLARE_SIMPLE_TYPE(FTGMAC100State, FTGMAC100)
  
+#define FTGMAC100_MEM_SIZE 0x1000

+#define FTGMAC100_REG_MEM_SIZE 0x100
+
  #include "hw/sysbus.h"
  #include "net/net.h"
  
@@ -30,6 +33,7 @@ struct FTGMAC100State {

  NICState *nic;
  NICConf conf;
  qemu_irq irq;
+MemoryRegion iomem_container;
  MemoryRegion iomem;
  
  uint8_t frame[FTGMAC100_MAX_FRAME_SIZE];

Re: [PATCH v4 30/31] i386/kvm: Add KVM_EXIT_HYPERCALL handling for KVM_HC_MAP_GPA_RANGE

2024-07-04 Thread Binbin Wu





On 5/30/2024 7:16 PM, Pankaj Gupta wrote:

[...]

+/*
+ * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE
+ * to service guest-initiated memory attribute update requests so that
+ * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be
+ * backed by the private memory pool provided by guest_memfd, and as such
+ * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX).
+ *
+ * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live

   ^
   extra "other"?

+ * migration, are not implemented here currently.
+ *
+ * For the guest_memfd use-case, these exits will generally be synthesized
+ * by KVM based on platform-specific hypercalls, like GHCB requests in the
+ * case of SEV-SNP, and not issued directly within the guest though the
+ * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is
+ * not actually advertised to guests via the KVM CPUID feature bit, as
+ * opposed to SEV live migration where it would be. Since it is unlikely the
+ * SEV live migration use-case would be useful for guest-memfd backed guests,
+ * because private/shared page tracking is already provided through other
+ * means, these 2 use-cases should be treated as being mutually-exclusive.
+ */
+static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
+{
+uint64_t gpa, size, attributes;
+
+if (!machine_require_guest_memfd(current_machine))
+return -EINVAL;
+
+gpa = run->hypercall.args[0];
+size = run->hypercall.args[1] * TARGET_PAGE_SIZE;
+attributes = run->hypercall.args[2];
+
+trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags);
+
+return kvm_convert_memory(gpa, size, attributes & 
KVM_MAP_GPA_RANGE_ENCRYPTED);


run->hypercall.ret should be updated accordingly.
At least for successful case.
For failure case, QEMU will shutdown the VM, is it the expected behavior?



+}
+
+static int kvm_handle_hypercall(struct kvm_run *run)
+{
+if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE)
+return kvm_handle_hc_map_gpa_range(run);
+
+return -EINVAL;
+}
+


[...]

Re: [PATCH v3 3/8] hw/net:ftgmac100: introduce TX and RX ring base address high registers to support 64 bits

2024-07-04 Thread Cédric Le Goater


On 7/4/24 10:29 AM, Jamin Lin wrote:

ASPEED AST2700 SOC is a 64 bits quad core CPUs (Cortex-a35)
And the base address of dram is "0x4 " which
is 64bits address.

It have "Normal Priority Transmit Ring Base Address Register High(0x17C)",
"High Priority Transmit Ring Base Address Register High(0x184)" and
"Receive Ring Base Address Register High(0x18C)" to save the high part physical
address of descriptor manager.
Ex: TX descriptor manager address [34:0]
The "Normal Priority Transmit Ring Base Address Register High(0x17C)"
bits [2:0] which corresponds the bits [34:32] of the 64 bits address of
the TX ring buffer address.
The "Normal Priority Transmit Ring Base Address Register(0x20)" bits [31:0]
which corresponds the bits [31:0] of the 64 bits address
of the TX ring buffer address.

Introduce a new sub region which size is 0x100 for the set of new registers
and map it at 0x100 in the container region.
This sub region range is from 0x100 to 0x1ff.

Introduce a new property and object attribute to activate the region for new 
registers.
Introduce a new memop handlers for the new register read and write.

Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/net/ftgmac100.c | 82 ++
  include/hw/net/ftgmac100.h |  4 ++
  2 files changed, 86 insertions(+)

diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index d026242e2b..68956aeb94 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -56,6 +56,16 @@
  #define FTGMAC100_PHYDATA 0x64
  #define FTGMAC100_FCR 0x68
  
+/*

+ * FTGMAC100 registers high
+ *
+ * values below are offset by - FTGMAC100_REG_HIGH_OFFSET from datasheet
+ * because its memory region is start at FTGMAC100_REG_HIGH_OFFSET
+ */
+#define FTGMAC100_NPTXR_BADR_HIGH   (0x17C - FTGMAC100_REG_HIGH_OFFSET)
+#define FTGMAC100_HPTXR_BADR_HIGH   (0x184 - FTGMAC100_REG_HIGH_OFFSET)
+#define FTGMAC100_RXR_BADR_HIGH (0x18C - FTGMAC100_REG_HIGH_OFFSET)
+
  /*
   * Interrupt status register & interrupt enable register
   */
@@ -913,6 +923,60 @@ static void ftgmac100_write(void *opaque, hwaddr addr,
  ftgmac100_update_irq(s);
  }
  
+static uint64_t ftgmac100_high_read(void *opaque, hwaddr addr, unsigned size)

+{
+FTGMAC100State *s = FTGMAC100(opaque);
+uint64_t val = 0;
+
+switch (addr) {
+case FTGMAC100_NPTXR_BADR_HIGH:
+val = extract64(s->tx_ring, 32, 32);
+break;
+case FTGMAC100_HPTXR_BADR_HIGH:
+/* High Priority Transmit Ring Base High Address */
+qemu_log_mask(LOG_UNIMP, "%s: read to unimplemented register 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+break;
+case FTGMAC100_RXR_BADR_HIGH:
+val = extract64(s->rx_ring, 32, 32);
+break;
+default:
+qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+break;
+}
+
+return val;
+}
+
+static void ftgmac100_high_write(void *opaque, hwaddr addr,
+  uint64_t value, unsigned size)
+{
+FTGMAC100State *s = FTGMAC100(opaque);
+
+switch (addr) {
+case FTGMAC100_NPTXR_BADR_HIGH:
+s->tx_ring = deposit64(s->tx_ring, 32, 32, value);
+s->tx_descriptor = deposit64(s->tx_descriptor, 32, 32, value);
+break;
+case FTGMAC100_HPTXR_BADR_HIGH:
+/* High Priority Transmit Ring Base High Address */
+qemu_log_mask(LOG_UNIMP, "%s: write to unimplemented register 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+break;
+case FTGMAC100_RXR_BADR_HIGH:
+s->rx_ring = deposit64(s->rx_ring, 32, 32, value);
+s->rx_descriptor = deposit64(s->rx_descriptor, 32, 32, value);
+break;
+default:
+qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+break;
+}
+
+ftgmac100_update_irq(s);
+}
+
  static int ftgmac100_filter(FTGMAC100State *s, const uint8_t *buf, size_t len)
  {
  unsigned mcast_idx;
@@ -1077,6 +1141,14 @@ static const MemoryRegionOps ftgmac100_ops = {
  .endianness = DEVICE_LITTLE_ENDIAN,
  };
  
+static const MemoryRegionOps ftgmac100_high_ops = {

+.read = ftgmac100_high_read,
+.write = ftgmac100_high_write,
+.valid.min_access_size = 4,
+.valid.max_access_size = 4,
+.endianness = DEVICE_LITTLE_ENDIAN,
+};
+
  static void ftgmac100_cleanup(NetClientState *nc)
  {
  FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc));
@@ -1114,6 +1186,15 @@ static void ftgmac100_realize(DeviceState *dev, Error 
**errp)
TYPE_FTGMAC100 ".regs", FTGMAC100_REG_MEM_SIZE);
  memory_region_add_subregion(&s->iomem_container, 0x0, &s->iomem);
  
+if (s->dma64) {

+memory_region_init_io(&s->iomem_high, OBJECT(s), &ftgmac100_high_ops,
+  s, TYPE_FTGMAC100 ".regs.high",
+

Re: [PATCH v3 5/8] aspeed/soc: set dma64 property for AST2700 ftgmac100

2024-07-04 Thread Cédric Le Goater


On 7/4/24 10:29 AM, Jamin Lin wrote:

ASPEED AST2700 SOC is a 64 bits quad core CPUs (Cortex-a35)
And the base address of dram is "0x4 " which
is 64bits address.

Set dma64 property for ftgmac100 model to support
64bits dram address DMA.

Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/arm/aspeed_ast27x0.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c
index 18e6a8b10c..a9fb0d4b88 100644
--- a/hw/arm/aspeed_ast27x0.c
+++ b/hw/arm/aspeed_ast27x0.c
@@ -552,9 +552,12 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, 
Error **errp)
  return;
  }
  
+/* Net */

  for (i = 0; i < sc->macs_num; i++) {
  object_property_set_bool(OBJECT(&s->ftgmac100[i]), "aspeed", true,
   &error_abort);
+object_property_set_bool(OBJECT(&s->ftgmac100[i]), "dma64", true,
+ &error_abort);
  if (!sysbus_realize(SYS_BUS_DEVICE(&s->ftgmac100[i]), errp)) {
  return;
  }

Re: [PATCH v3 7/8] machine_aspeed.py: update to test ASPEED OpenBMC SDK v09.02 for AST2700

2024-07-04 Thread Cédric Le Goater


On 7/4/24 10:29 AM, Jamin Lin wrote:

Update test case to test ASPEED OpenBMC SDK v09.02 for AST2700.

ASPEED fixed TX mask issue from linux/drivers/ftgmac100.c.
It is required to use ASPEED OpenBMC SDK since v09.02
for AST2700 QEMU network testing.

A test image is downloaded from the ASPEED Forked OpenBMC GitHub
release repository :
https://github.com/AspeedTech-BMC/openbmc/releases/

Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  tests/avocado/machine_aspeed.py | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py
index 3a20644fb2..13fe128fc9 100644
--- a/tests/avocado/machine_aspeed.py
+++ b/tests/avocado/machine_aspeed.py
@@ -387,15 +387,15 @@ def test_arm_ast2600_evb_sdk(self):
  year = time.strftime("%Y")
  self.ssh_command_output_contains('/sbin/hwclock -f /dev/rtc1', year);
  
-def test_aarch64_ast2700_evb_sdk_v09_01(self):

+def test_aarch64_ast2700_evb_sdk_v09_02(self):
  """
  :avocado: tags=arch:aarch64
  :avocado: tags=machine:ast2700-evb
  """
  
  image_url = ('https://github.com/AspeedTech-BMC/openbmc/releases/'

- 'download/v09.01/ast2700-default-obmc.tar.gz')
-image_hash = 
'b1cc0fd73c7650d34c9c8459a243f52a91e9e27144b8608b2645ab19461d1e07'
+ 'download/v09.02/ast2700-default-obmc.tar.gz')
+image_hash = 
'ac969c2602f4e6bdb69562ff466b89ae3fe1d86e1f6797bb7969d787f82116a7'
  image_path = self.fetch_asset(image_url, asset_hash=image_hash,
algorithm='sha256')
  archive.extract(image_path, self.workdir)

Re: [PATCH v3 8/8] machine_aspeed.py: update to test network for AST2700

2024-07-04 Thread Cédric Le Goater


On 7/4/24 10:29 AM, Jamin Lin wrote:

Update test case to test network connection via SSH.

Test command:
```
cd build
pyvenv/bin/avocado run 
../qemu/tests/avocado/machine_aspeed.py:AST2x00MachineSDK.test_aarch64_ast2700_evb_sdk_v09_02
```

Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  tests/avocado/machine_aspeed.py | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py
index 13fe128fc9..f66ad38d35 100644
--- a/tests/avocado/machine_aspeed.py
+++ b/tests/avocado/machine_aspeed.py
@@ -313,14 +313,14 @@ def do_test_arm_aspeed_sdk_start(self, image):
  
  def do_test_aarch64_aspeed_sdk_start(self, image):

  self.vm.set_console()
-self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw')
+self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw',
+ '-net', 'nic', '-net', 
'user,hostfwd=:127.0.0.1:0-:22')
  
  self.vm.launch()
  
  self.wait_for_console_pattern('U-Boot 2023.10')

  self.wait_for_console_pattern('## Loading kernel from FIT Image')
  self.wait_for_console_pattern('Starting kernel ...')
-self.wait_for_console_pattern("systemd[1]: Hostname set to")
  
  @skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test is unstable on GitLab')
  
@@ -436,4 +436,6 @@ def test_aarch64_ast2700_evb_sdk_v09_02(self):
  
  self.vm.add_args('-smp', str(num_cpu))

  self.do_test_aarch64_aspeed_sdk_start(image_dir + 'image-bmc')
+self.wait_for_console_pattern('nodistro.0 ast2700-default ttyS12')
+self.ssh_connect('root', '0penBmc', False)

Re: [PATCH v3 2/8] hw/net:ftgmac100: update ring base address to 64 bits

2024-07-04 Thread Cédric Le Goater


On 7/4/24 10:29 AM, Jamin Lin wrote:

Update TX and RX ring base address data type to uint64_t for
64 bits dram address DMA support.

Both "Normal Priority Transmit Ring Base Address Register(0x20)" and
"Receive Ring Base Address Register (0x24)" are used for saving the
low part physical address of descriptor manager.

Therefore, changes to set TX and RX descriptor manager address bits [31:0]
in ftgmac100_read and ftgmac100_write functions.

Incrementing the version of vmstate to 2.

Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/net/ftgmac100.c | 33 -
  include/hw/net/ftgmac100.h |  9 -
  2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index 9e1f12cd33..d026242e2b 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -515,12 +515,12 @@ out:
  return frame_size;
  }
  
-static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring,

-uint32_t tx_descriptor)
+static void ftgmac100_do_tx(FTGMAC100State *s, uint64_t tx_ring,
+uint64_t tx_descriptor)
  {
  int frame_size = 0;
  uint8_t *ptr = s->frame;
-uint32_t addr = tx_descriptor;
+uint64_t addr = tx_descriptor;
  uint32_t flags = 0;
  
  while (1) {

@@ -726,9 +726,9 @@ static uint64_t ftgmac100_read(void *opaque, hwaddr addr, 
unsigned size)
  case FTGMAC100_MATH1:
  return s->math[1];
  case FTGMAC100_RXR_BADR:
-return s->rx_ring;
+return extract64(s->rx_ring, 0, 32);
  case FTGMAC100_NPTXR_BADR:
-return s->tx_ring;
+return extract64(s->tx_ring, 0, 32);
  case FTGMAC100_ITC:
  return s->itc;
  case FTGMAC100_DBLAC:
@@ -799,9 +799,8 @@ static void ftgmac100_write(void *opaque, hwaddr addr,
HWADDR_PRIx "\n", __func__, value);
  return;
  }
-
-s->rx_ring = value;
-s->rx_descriptor = s->rx_ring;
+s->rx_ring = deposit64(s->rx_ring, 0, 32, value);
+s->rx_descriptor = deposit64(s->rx_descriptor, 0, 32, value);
  break;
  
  case FTGMAC100_RBSR: /* DMA buffer size */

@@ -814,8 +813,8 @@ static void ftgmac100_write(void *opaque, hwaddr addr,
HWADDR_PRIx "\n", __func__, value);
  return;
  }
-s->tx_ring = value;
-s->tx_descriptor = s->tx_ring;
+s->tx_ring = deposit64(s->tx_ring, 0, 32, value);
+s->tx_descriptor = deposit64(s->tx_descriptor, 0, 32, value);
  break;
  
  case FTGMAC100_NPTXPD: /* Trigger transmit */

@@ -957,7 +956,7 @@ static ssize_t ftgmac100_receive(NetClientState *nc, const 
uint8_t *buf,
  FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc));
  FTGMAC100Desc bd;
  uint32_t flags = 0;
-uint32_t addr;
+uint64_t addr;
  uint32_t crc;
  uint32_t buf_addr;
  uint8_t *crc_ptr;
@@ -1126,18 +1125,14 @@ static void ftgmac100_realize(DeviceState *dev, Error 
**errp)
  
  static const VMStateDescription vmstate_ftgmac100 = {

  .name = TYPE_FTGMAC100,
-.version_id = 1,
-.minimum_version_id = 1,
+.version_id = 2,
+.minimum_version_id = 2,
  .fields = (const VMStateField[]) {
  VMSTATE_UINT32(irq_state, FTGMAC100State),
  VMSTATE_UINT32(isr, FTGMAC100State),
  VMSTATE_UINT32(ier, FTGMAC100State),
  VMSTATE_UINT32(rx_enabled, FTGMAC100State),
-VMSTATE_UINT32(rx_ring, FTGMAC100State),
  VMSTATE_UINT32(rbsr, FTGMAC100State),
-VMSTATE_UINT32(tx_ring, FTGMAC100State),
-VMSTATE_UINT32(rx_descriptor, FTGMAC100State),
-VMSTATE_UINT32(tx_descriptor, FTGMAC100State),
  VMSTATE_UINT32_ARRAY(math, FTGMAC100State, 2),
  VMSTATE_UINT32(itc, FTGMAC100State),
  VMSTATE_UINT32(aptcr, FTGMAC100State),
@@ -1156,6 +1151,10 @@ static const VMStateDescription vmstate_ftgmac100 = {
  VMSTATE_UINT32(phy_int_mask, FTGMAC100State),
  VMSTATE_UINT32(txdes0_edotr, FTGMAC100State),
  VMSTATE_UINT32(rxdes0_edorr, FTGMAC100State),
+VMSTATE_UINT64(rx_ring, FTGMAC100State),
+VMSTATE_UINT64(tx_ring, FTGMAC100State),
+VMSTATE_UINT64(rx_descriptor, FTGMAC100State),
+VMSTATE_UINT64(tx_descriptor, FTGMAC100State),
  VMSTATE_END_OF_LIST()
  }
  };
diff --git a/include/hw/net/ftgmac100.h b/include/hw/net/ftgmac100.h
index 269446e858..aae57ae8cb 100644
--- a/include/hw/net/ftgmac100.h
+++ b/include/hw/net/ftgmac100.h
@@ -42,10 +42,6 @@ struct FTGMAC100State {
  uint32_t isr;
  uint32_t ier;
  uint32_t rx_enabled;
-uint32_t rx_ring;
-uint32_t rx_descriptor;
-uint32_t tx_ring;
-uint32_t tx_descriptor;
  uint32_t math[2];
  uint32_t rbsr;
  uint32_t itc;
@@ -58,7 +54,10 @@ struct FTGMAC100State {
  uint32_t phycr;
  uint32_t phydata;
  uint32_t fcr;
-

[PATCH] hw/cxl/cxl-host: Fix guest crash when getting cxl-fmw property

2024-07-04 Thread Zhao Liu

From: Zhao Liu 

Guest crashes (Segmentation fault) when getting cxl-fmw property via
qmp:

(QEMU) qom-get path=machine property=cxl-fmw

This issue is caused by accessing wrong callback (opaque) type in
machine_get_cfmw().

cxl_machine_init() sets the callback as `CXLState *` type but
machine_get_cfmw() treats the callback as
`CXLFixedMemoryWindowOptionsList **`.

Fix this error by casting opaque to `CXLState *` type in
machine_get_cfmw().

Fixes: 03b39fcf64bc ("hw/cxl: Make the CXL fixed memory window setup a machine 
parameter.")
Signed-off-by: Zhao Liu 
---
 hw/cxl/cxl-host.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index c5f5fcfd64d0..e9f2543c43c6 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -315,7 +315,8 @@ static void machine_set_cxl(Object *obj, Visitor *v, const 
char *name,
 static void machine_get_cfmw(Object *obj, Visitor *v, const char *name,
  void *opaque, Error **errp)
 {
-CXLFixedMemoryWindowOptionsList **list = opaque;
+CXLState *state = opaque;
+CXLFixedMemoryWindowOptionsList **list = &state->cfmw_list;
 
 visit_type_CXLFixedMemoryWindowOptionsList(v, name, list, errp);
 }
-- 
2.34.1

Re: [PATCH v3 4/8] hw/net:ftgmac100: update TX and RX packet buffers address to 64 bits

2024-07-04 Thread Cédric Le Goater


On 7/4/24 10:29 AM, Jamin Lin wrote:

ASPEED AST2700 SOC is a 64 bits quad core CPUs (Cortex-a35)
And the base address of dram is "0x4 " which
is 64bits address.

It have "TXDES 2" and "RXDES 2" to save the high part
physical address of packet buffer.
Ex: TX packet buffer address [34:0]
The "TXDES 2" bits [18:16] which corresponds the bits [34:32]
of the 64 bits address of the TX packet buffer address
and "TXDES 3" bits [31:0] which corresponds the bits [31:0]
of the 64 bits address of the TX packet buffer address.

Update TX and RX packet buffers address type to
64 bits for dram 64 bits address DMA support.

Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/net/ftgmac100.c | 21 ++---
  1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index 68956aeb94..80f9cd56d5 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -175,6 +175,8 @@
  #define FTGMAC100_TXDES1_TX2FIC  (1 << 30)
  #define FTGMAC100_TXDES1_TXIC(1 << 31)
  
+#define FTGMAC100_TXDES2_TXBUF_BADR_HI(x)   (((x) >> 16) & 0x7)

+
  /*
   * Receive descriptor
   */
@@ -208,13 +210,15 @@
  #define FTGMAC100_RXDES1_UDP_CHKSUM_ERR  (1 << 26)
  #define FTGMAC100_RXDES1_IP_CHKSUM_ERR   (1 << 27)
  
+#define FTGMAC100_RXDES2_RXBUF_BADR_HI(x)   (((x) >> 16) & 0x7)

+
  /*
   * Receive and transmit Buffer Descriptor
   */
  typedef struct {
  uint32_tdes0;
  uint32_tdes1;
-uint32_tdes2;/* not used by HW */
+uint32_tdes2;/* used by HW 64 bits DMA */
  uint32_tdes3;
  } FTGMAC100Desc;
  
@@ -531,6 +535,7 @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint64_t tx_ring,

  int frame_size = 0;
  uint8_t *ptr = s->frame;
  uint64_t addr = tx_descriptor;
+uint64_t buf_addr = 0;
  uint32_t flags = 0;
  
  while (1) {

@@ -569,7 +574,12 @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint64_t 
tx_ring,
  len =  sizeof(s->frame) - frame_size;
  }
  
-if (dma_memory_read(&address_space_memory, bd.des3,

+buf_addr = bd.des3;
+if (s->dma64) {
+buf_addr = deposit64(buf_addr, 32, 32,
+ FTGMAC100_TXDES2_TXBUF_BADR_HI(bd.des2));
+}
+if (dma_memory_read(&address_space_memory, buf_addr,
  ptr, len, MEMTXATTRS_UNSPECIFIED)) {
  qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to read packet @ 
0x%x\n",
__func__, bd.des3);
@@ -1022,7 +1032,7 @@ static ssize_t ftgmac100_receive(NetClientState *nc, 
const uint8_t *buf,
  uint32_t flags = 0;
  uint64_t addr;
  uint32_t crc;
-uint32_t buf_addr;
+uint64_t buf_addr = 0;
  uint8_t *crc_ptr;
  uint32_t buf_len;
  size_t size = len;
@@ -1087,7 +1097,12 @@ static ssize_t ftgmac100_receive(NetClientState *nc, 
const uint8_t *buf,
  if (size < 4) {
  buf_len += size - 4;
  }
+
  buf_addr = bd.des3;
+if (s->dma64) {
+buf_addr = deposit64(buf_addr, 32, 32,
+ FTGMAC100_RXDES2_RXBUF_BADR_HI(bd.des2));
+}
  if (first && proto == ETH_P_VLAN && buf_len >= 18) {
  bd.des1 = lduw_be_p(buf + 14) | FTGMAC100_RXDES1_VLANTAG_AVAIL;

Re: [PATCH] i386/sev: Don't allow automatic fallback to legacy KVM_SEV*_INIT

2024-07-04 Thread Paolo Bonzini

On Thu, Jul 4, 2024 at 10:42 AM Daniel P. Berrangé  wrote:
>
> On Thu, Jul 04, 2024 at 08:51:05AM +0200, Paolo Bonzini wrote:
> > On Thu, Jul 4, 2024 at 2:01 AM Michael Roth  wrote:
> > > Currently if the 'legacy-vm-type' property of the sev-guest object is
> > > left unset, QEMU will attempt to use the newer KVM_SEV_INIT2 kernel
> > > interface in conjunction with the newer KVM_X86_SEV_VM and
> > > KVM_X86_SEV_ES_VM KVM VM types.
> > >
> > > This can lead to measurement changes if, for instance, an SEV guest was
> > > created on a host that originally had an older kernel that didn't
> > > support KVM_SEV_INIT2, but is booted on the same host later on after the
> > > host kernel was upgraded.
> >
> > I think this is the right thing to do for SEV-ES. I agree that it's
> > bad to require a very new kernel (6.10 will be released only a month
> > before QEMU 9.1), on the other hand the KVM_SEV_ES_INIT API is broken
> > in several ways. As long as there is a way to go back to it, and it's
> > not changed by old machine types, not using it for SEV-ES is the
> > better choice for upstream.
>
> Broken how ?   I know there was the regression with the 'debug_swap'
> parameter, but was something that should just be fixed in the kernel,
> rather than breaking userspace. What else is a problem ?

The debug_swap parameter simply could not be enabled in the old API
without breaking measurements. The new API *is the fix* to allow using
it (though QEMU doesn't have the option plumbed in yet). There is no
extensibility.

Enabling debug_swap by default is also a thorny problem; it cannot be
enabled by default because not all CPUs support it, and also we'd have
the same problem that we cannot enable debug_swap on new machine types
without requiring a new kernel. Tying the default to the -cpu model
would work but it is confusing.

But I guess we can add support for debug_swap, disabled by default and
switch to the new API if debug_swap is enabled.

> I don't think its reasonable for QEMU to require a brand new kernel
> for new machine types, given SEV & SEV-ES have been deployed for
> many years already.

I think it's reasonable if the fix is displayed right into the error
message. It's only needed for SEV-ES though, SEV can use the old and
new APIs interchangeably.

Paolo

Re: [PATCH] hw/intc: sifive_plic: Fix heap-buffer-overflow in SiFive PLIC read operation

2024-07-04 Thread Peter Maydell

On Wed, 3 Jul 2024 at 22:32, Zheyu Ma  wrote:
>
> The sifive_plic_read function in hw/intc/sifive_plic.c had a potential
> heap-buffer-overflow issue when reading from the pending_base region.
> This occurred because the code did not check if the calculated word index
> was within valid bounds before accessing the pending array.
>
> This fix prevents out-of-bounds memory access, ensuring safer and more
> robust handling of PLIC reads.
>
> ASAN log:
> ==78800==ERROR: AddressSanitizer: heap-buffer-overflow on address 
> 0x60238a14 at pc 0x5baf49d0d6cb bp 0x7ffc2ea4e180 sp 0x7ffc2ea4e178
> READ of size 4 at 0x60238a14 thread T0
> #0 0x5baf49d0d6ca in sifive_plic_read hw/intc/sifive_plic.c:151:16
> #1 0x5baf49f7f3bb in memory_region_read_accessor system/memory.c:445:11
>
> Reproducer:
> cat << EOF | qemu-system-riscv64  -display \
> none -machine accel=qtest, -m 512M -machine shakti_c -m 2G -qtest stdio
> readl 0xc001004
> EOF
>
> Signed-off-by: Zheyu Ma 
> ---
>  hw/intc/sifive_plic.c | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c
> index e559f11805..d2a90dfd3a 100644
> --- a/hw/intc/sifive_plic.c
> +++ b/hw/intc/sifive_plic.c
> @@ -147,7 +147,14 @@ static uint64_t sifive_plic_read(void *opaque, hwaddr 
> addr, unsigned size)
>  (plic->num_sources + 31) >> 3)) {
>  uint32_t word = (addr - plic->pending_base) >> 2;
>
> -return plic->pending[word];
> +if (word < plic->bitfield_words) {
> +return plic->pending[word];
> +} else {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "sifive_plic_read: Word out of bounds for 
> pending_base read: word=%u\n",
> +  word);
> +return 0;
> +}

This seems a bit odd. This part of the code is guarded by

} else if (addr_between(addr, plic->pending_base,
(plic->num_sources + 31) >> 3)) {

and we calculate plic->bitfield_words in realize based on
plic->num_sources:
s->bitfield_words = (s->num_sources + 31) >> 5;

so presumably the intention was that we put enough words
in the bitfield for the number of sources we have, so that
the array access wouldn't overrun. Maybe we got the
calculation wrong?

thanks
-- PMM

Re: [PATCH 1/4] accel/kvm/kvm-all: Fix superfluous trailing semicolon

2024-07-04 Thread Peter Maydell

On Thu, 4 Jul 2024 at 09:32, Zhao Liu  wrote:
>
> Signed-off-by: Zhao Liu 
> ---
>  accel/kvm/kvm-all.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 2b4ab896794b..64bf47a03300 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -3878,7 +3878,7 @@ static StatsList *add_kvmstat_entry(struct 
> kvm_stats_desc *pdesc,
>  /* Alloc and populate data list */
>  stats = g_new0(Stats, 1);
>  stats->name = g_strdup(pdesc->name);
> -stats->value = g_new0(StatsValue, 1);;
> +stats->value = g_new0(StatsValue, 1);
>
>  if ((pdesc->flags & KVM_STATS_UNIT_MASK) == KVM_STATS_UNIT_BOOLEAN) {
>  stats->value->u.boolean = *stats_data;

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH 3/4] util/oslib-posix: Fix superfluous trailing semicolon

2024-07-04 Thread Peter Maydell

On Thu, 4 Jul 2024 at 09:33, Zhao Liu  wrote:
>
> Signed-off-by: Zhao Liu 
> ---
>  util/oslib-posix.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index e76441695bdc..b090fe0eed0d 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -263,7 +263,7 @@ int qemu_socketpair(int domain, int type, int protocol, 
> int sv[2])
>  return ret;
>  }
>  #endif
> -ret = socketpair(domain, type, protocol, sv);;
> +ret = socketpair(domain, type, protocol, sv);
>  if (ret == 0) {
>  qemu_set_cloexec(sv[0]);
>  qemu_set_cloexec(sv[1]);
> --
> 2.34.1
>

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH 4/4] target/hexagon/imported/mmvec: Fix superfluous trailing semicolon

2024-07-04 Thread Peter Maydell

On Thu, 4 Jul 2024 at 09:33, Zhao Liu  wrote:
>
> Fix the superfluous trailing semicolon in target/hexagon/imported/mmvec/
> ext.idef.
>
> Cc: Brian Cain 
> Signed-off-by: Zhao Liu 
> ---
>  target/hexagon/imported/mmvec/ext.idef | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/hexagon/imported/mmvec/ext.idef 
> b/target/hexagon/imported/mmvec/ext.idef
> index 98daabfb07c4..03d31f6181d7 100644
> --- a/target/hexagon/imported/mmvec/ext.idef
> +++ b/target/hexagon/imported/mmvec/ext.idef
> @@ -2855,7 +2855,7 @@ EXTINSN(V6_vscattermhw_add,  
> "vscatter(Rt32,Mu2,Vvv32.w).h+=Vw32", ATTRIBS(A_EXT
>  fVALIGN(RtV, element_size);
>  fVFOREACH(32, i) {
>  for(j = 0; j < 2; j++) {
> - EA =  RtV + fVALIGN(VvvV.v[j].uw[i],ALIGNMENT);;
> + EA =  RtV + fVALIGN(VvvV.v[j].uw[i],ALIGNMENT);
>   
> fVLOG_VTCM_HALFWORD_INCREMENT_DV(EA,VvvV.v[j].uw[i],VwV,(2*i+j),i,j,ALIGNMENT,MuV);
>  }
>  }
> --
> 2.34.1

As a change this is obviously fine, but given the "imported"
in the pathname I don't know if this is something that should
be fixed in whatever upstream source we got this from instead
or as well. Brian ?

thanks
-- PMM

Re: [PATCH 2/4] hw/i386/x86: Fix superfluous trailing semicolon

2024-07-04 Thread Peter Maydell

On Thu, 4 Jul 2024 at 09:33, Zhao Liu  wrote:
>
> Signed-off-by: Zhao Liu 
> ---
>  hw/i386/x86.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index a4aa8e081098..01fc5e656272 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -242,7 +242,7 @@ static void x86_machine_get_pit(Object *obj, Visitor *v, 
> const char *name,
>  static void x86_machine_set_pit(Object *obj, Visitor *v, const char *name,
>  void *opaque, Error **errp)
>  {
> -X86MachineState *x86ms = X86_MACHINE(obj);;
> +X86MachineState *x86ms = X86_MACHINE(obj);
>
>  visit_type_OnOffAuto(v, name, &x86ms->pit, errp);
>  }
> --
> 2.34.1

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH] i386/sev: Don't allow automatic fallback to legacy KVM_SEV*_INIT

2024-07-04 Thread Daniel P . Berrangé

On Thu, Jul 04, 2024 at 11:31:16AM +0200, Paolo Bonzini wrote:
> On Thu, Jul 4, 2024 at 10:42 AM Daniel P. Berrangé  
> wrote:
> >
> > On Thu, Jul 04, 2024 at 08:51:05AM +0200, Paolo Bonzini wrote:
> > > On Thu, Jul 4, 2024 at 2:01 AM Michael Roth  wrote:
> > > > Currently if the 'legacy-vm-type' property of the sev-guest object is
> > > > left unset, QEMU will attempt to use the newer KVM_SEV_INIT2 kernel
> > > > interface in conjunction with the newer KVM_X86_SEV_VM and
> > > > KVM_X86_SEV_ES_VM KVM VM types.
> > > >
> > > > This can lead to measurement changes if, for instance, an SEV guest was
> > > > created on a host that originally had an older kernel that didn't
> > > > support KVM_SEV_INIT2, but is booted on the same host later on after the
> > > > host kernel was upgraded.
> > >
> > > I think this is the right thing to do for SEV-ES. I agree that it's
> > > bad to require a very new kernel (6.10 will be released only a month
> > > before QEMU 9.1), on the other hand the KVM_SEV_ES_INIT API is broken
> > > in several ways. As long as there is a way to go back to it, and it's
> > > not changed by old machine types, not using it for SEV-ES is the
> > > better choice for upstream.
> >
> > Broken how ?   I know there was the regression with the 'debug_swap'
> > parameter, but was something that should just be fixed in the kernel,
> > rather than breaking userspace. What else is a problem ?
> 
> The debug_swap parameter simply could not be enabled in the old API
> without breaking measurements. The new API *is the fix* to allow using
> it (though QEMU doesn't have the option plumbed in yet). There is no
> extensibility.
> 
> Enabling debug_swap by default is also a thorny problem; it cannot be
> enabled by default because not all CPUs support it, and also we'd have
> the same problem that we cannot enable debug_swap on new machine types
> without requiring a new kernel. Tying the default to the -cpu model
> would work but it is confusing.

Presumably we can tie it to '-cpu host' without much problem, and
then just leave it as an opt-in feature flag for named CPU models.

> But I guess we can add support for debug_swap, disabled by default and
> switch to the new API if debug_swap is enabled.
> 
> > I don't think its reasonable for QEMU to require a brand new kernel
> > for new machine types, given SEV & SEV-ES have been deployed for
> > many years already.
> 
> I think it's reasonable if the fix is displayed right into the error
> message. It's only needed for SEV-ES though, SEV can use the old and
> new APIs interchangeably.

FYI currently it is proposed to unconditionally force set legacy-vm-type=true
in libvirt, so QEMU guests would *never* use the new ioctl, to fix what we
consider to be a QEMU / kernel guest ABI regression.

If libvirt adds this though, it is basically a forever setting we would
never be able to remove as removing would break ABI compat again.

  
https://lists.libvirt.org/archives/list/de...@lists.libvirt.org/message/KP24LPFV4OCMN45TDHNXQVBPDZ56QSRR/

I'd much rather QEMU did NOT set this by default in its machine types,
so libvirt doesn't have to add this forced flag. That way downstream
distros who /can/ guarantee new enough kernels, can still use
legacy-vm-type=false in their downstream machine types, without
libvirt overriding this.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH 1/4] accel/kvm/kvm-all: Fix superfluous trailing semicolon

2024-07-04 Thread Alex Bennée

Zhao Liu  writes:

> Signed-off-by: Zhao Liu 
> ---
>  accel/kvm/kvm-all.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 2b4ab896794b..64bf47a03300 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -3878,7 +3878,7 @@ static StatsList *add_kvmstat_entry(struct 
> kvm_stats_desc *pdesc,
>  /* Alloc and populate data list */
>  stats = g_new0(Stats, 1);
>  stats->name = g_strdup(pdesc->name);
> -stats->value = g_new0(StatsValue, 1);;
> +stats->value = g_new0(StatsValue, 1);
>  
>  if ((pdesc->flags & KVM_STATS_UNIT_MASK) == KVM_STATS_UNIT_BOOLEAN) {
>  stats->value->u.boolean = *stats_data;

Reviewed-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH 4/4] target/hexagon/imported/mmvec: Fix superfluous trailing semicolon

2024-07-04 Thread Alex Bennée

Zhao Liu  writes:

> Fix the superfluous trailing semicolon in target/hexagon/imported/mmvec/
> ext.idef.
>
> Cc: Brian Cain 
> Signed-off-by: Zhao Liu 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH 3/4] util/oslib-posix: Fix superfluous trailing semicolon

2024-07-04 Thread Alex Bennée

Zhao Liu  writes:

> Signed-off-by: Zhao Liu 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH] MAINTAINERS: add Stefano Garzarella as vhost/vhost-user reviewer

2024-07-04 Thread Alex Bennée

Stefano Garzarella  writes:

> I have recently been working on supporting vhost-user on any POSIX,
> so I want to help maintain it.
>
> Cc: Michael S. Tsirkin 
> Signed-off-by: Stefano Garzarella 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH] i386/sev: Don't allow automatic fallback to legacy KVM_SEV*_INIT

2024-07-04 Thread Paolo Bonzini

On Thu, Jul 4, 2024 at 11:39 AM Daniel P. Berrangé  wrote:
> > The debug_swap parameter simply could not be enabled in the old API
> > without breaking measurements. The new API *is the fix* to allow using
> > it (though QEMU doesn't have the option plumbed in yet). There is no
> > extensibility.
> >
> > Enabling debug_swap by default is also a thorny problem; it cannot be
> > enabled by default because not all CPUs support it, and also we'd have
> > the same problem that we cannot enable debug_swap on new machine types
> > without requiring a new kernel. Tying the default to the -cpu model
> > would work but it is confusing.
>
> Presumably we can tie it to '-cpu host' without much problem, and
> then just leave it as an opt-in feature flag for named CPU models.

'-cpu host' for SEV-SNP is also problematic, since CPUID is part of
the measurement. It's okay for starting guests in a quick and dirty
manner, but it cannot be used if measurement is in use.

It's weird to have "-cpu" provide the default for "-object", since
-object is created much earlier than CPUs. But then "-cpu" itself is
weird because it's a kind of "factory" for future objects. Maybe we
should redo the same exercise I did to streamline machine
initialization, this time focusing on -cpu/-machine/-accel, to
understand the various phases and where sev-{,snp-}guest
initialization fits.

> > I think it's reasonable if the fix is displayed right into the error
> > message. It's only needed for SEV-ES though, SEV can use the old and
> > new APIs interchangeably.
>
> FYI currently it is proposed to unconditionally force set legacy-vm-type=true
> in libvirt, so QEMU guests would *never* use the new ioctl, to fix what we
> consider to be a QEMU / kernel guest ABI regression.

Ok, so it's probably best to apply both this and your patch for now.
Later debug-swap can be enabled and will automatically disable
legacy-vm-type if the user left it to the default.

If you want to test this combo and send a pull request (either to me
or to Richard), that would help because I'm mostly away for a few
days.

Paolo

Re: [PULL v3 58/85] hw/i386/fw_cfg: Add etc/e820 to fw_cfg late

2024-07-04 Thread Alex Bennée

David Woodhouse  writes:

> On Wed, 2024-07-03 at 18:48 -0400, Michael S. Tsirkin wrote:
>> From: David Woodhouse 
>
> Oops, that was supposed to be
>
> From: David Woodhouse 
>
> Not the end of the world if it's too late to change it.

If attribution matters we do have .mailmap and the gitdm metadata for
after the fact fixups.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

[PULL 01/16] meson: move shared_module() calls where modules are already walked

2024-07-04 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 meson.build | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/meson.build b/meson.build
index 54e6b09f4fb..8909f8c87d9 100644
--- a/meson.build
+++ b/meson.build
@@ -3602,6 +3602,7 @@ modinfo_files = []
 
 block_mods = []
 system_mods = []
+emulator_modules = []
 foreach d, list : modules
   if not (d == 'block' ? have_block : have_system)
 continue
@@ -3609,14 +3610,20 @@ foreach d, list : modules
 
   foreach m, module_ss : list
 if enable_modules
+  module_ss.add(modulecommon)
   module_ss = module_ss.apply(config_all_devices, strict: false)
   sl = static_library(d + '-' + m, [genh, module_ss.sources()],
-  dependencies: [modulecommon, 
module_ss.dependencies()], pic: true)
+  dependencies: module_ss.dependencies(), pic: true)
   if d == 'block'
 block_mods += sl
   else
 system_mods += sl
   endif
+  emulator_modules += shared_module(sl.name(),
+name_prefix: '',
+link_whole: sl,
+install: true,
+install_dir: qemu_moddir)
   if module_ss.sources() != []
 # FIXME: Should use sl.extract_all_objects(recursive: true) as
 # input. Sources can be used multiple times but objects are
@@ -3642,6 +3649,7 @@ endforeach
 foreach d, list : target_modules
   foreach m, module_ss : list
 if enable_modules
+  module_ss.add(modulecommon)
   foreach target : target_dirs
 if target.endswith('-softmmu')
   config_target = config_target_mak[target]
@@ -3654,11 +3662,16 @@ foreach d, list : target_modules
 module_name = d + '-' + m + '-' + config_target['TARGET_NAME']
 sl = static_library(module_name,
 [genh, target_module_ss.sources()],
-dependencies: [modulecommon, 
target_module_ss.dependencies()],
+dependencies: target_module_ss.dependencies(),
 include_directories: target_inc,
 c_args: c_args,
 pic: true)
 system_mods += sl
+emulator_modules += shared_module(sl.name(),
+name_prefix: '',
+link_whole: sl,
+install: true,
+install_dir: qemu_moddir)
 # FIXME: Should use sl.extract_all_objects(recursive: true) too.
 modinfo_files += custom_target(module_name + '.modinfo',
output: module_name + '.modinfo',
@@ -3692,6 +3705,10 @@ if enable_modules
   hw_arch[arch].add(modinfo_dep)
 endif
   endforeach
+
+  if emulator_modules.length() > 0
+alias_target('modules', emulator_modules)
+  endif
 endif
 
 nm = find_program('nm')
@@ -3785,19 +3802,6 @@ common_ss.add(hwcore)
 # Targets #
 ###
 
-emulator_modules = []
-foreach m : block_mods + system_mods
-  emulator_modules += shared_module(m.name(),
-build_by_default: true,
-name_prefix: '',
-link_whole: m,
-install: true,
-install_dir: qemu_moddir)
-endforeach
-if emulator_modules.length() > 0
-  alias_target('modules', emulator_modules)
-endif
-
 system_ss.add(authz, blockdev, chardev, crypto, io, qmp)
 common_ss.add(qom, qemuutil)
 
-- 
2.45.2

[PULL 06/16] meson: Drop the .fa library suffix

2024-07-04 Thread Paolo Bonzini

The non-standard .fa library suffix breaks the link source
de-duplication done by Meson so drop it.

The lack of link source de-duplication causes AddressSanitizer to
complain ODR violations, and makes GNU ld abort when combined with
clang's LTO.

Fortunately, the non-standard suffix is not necessary anymore for
two reasons.

First, the non-standard suffix was necessary for fork-fuzzing.
Meson wraps all standard-suffixed libraries with --start-group and
--end-group. This made a fork-fuzz.ld linker script wrapped as well and
broke builds. Commit d2e6f9272d33 ("fuzz: remove fork-fuzzing
scaffolding") dropped fork-fuzzing so we can now restore the standard
suffix.

Second, the libraries are not even built anymore, because it is
possible to just use the object files directly via extract_all_objects().

The occurences of the suffix were detected and removed by performing
a tree-wide search with 'fa' and .fa (note the quotes and dot).

Signed-off-by: Akihiko Odaki 
Message-ID: <20240524-xkb-v4-4-2de564e5c...@daynix.com>
Signed-off-by: Paolo Bonzini 
---
 docs/devel/build-system.rst |  5 -
 meson.build | 17 ++---
 stubs/blk-exp-close-all.c   |  2 +-
 .gitlab-ci.d/buildtest-template.yml |  2 --
 .gitlab-ci.d/buildtest.yml  |  2 --
 gdbstub/meson.build |  2 --
 tcg/meson.build |  2 --
 tests/Makefile.include  |  2 +-
 tests/qtest/libqos/meson.build  |  1 -
 9 files changed, 4 insertions(+), 31 deletions(-)

diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst
index 39a1934c63f..79eceb179de 100644
--- a/docs/devel/build-system.rst
+++ b/docs/devel/build-system.rst
@@ -235,16 +235,11 @@ Subsystem sourcesets:
   are then turned into static libraries as follows::
 
 libchardev = static_library('chardev', chardev_ss.sources(),
-name_suffix: 'fa',
 build_by_default: false)
 
 chardev = declare_dependency(objects: 
libchardev.extract_all_objects(recursive: false),
  dependencies: chardev_ss.dependencies())
 
-  As of Meson 0.55.1, the special ``.fa`` suffix should be used for everything
-  that is used with ``link_whole``, to ensure that the link flags are placed
-  correctly in the command line.
-
 Target-independent emulator sourcesets:
   Various general purpose helper code is compiled only once and
   the .o files are linked into all output binaries that need it.
diff --git a/meson.build b/meson.build
index 429899d8603..3a1ad4ddeb4 100644
--- a/meson.build
+++ b/meson.build
@@ -3467,7 +3467,6 @@ endif
 qom_ss = qom_ss.apply({})
 libqom = static_library('qom', qom_ss.sources() + genh,
 dependencies: [qom_ss.dependencies()],
-name_suffix: 'fa',
 build_by_default: false)
 qom = declare_dependency(objects: libqom.extract_all_objects(recursive: false),
  dependencies: qom_ss.dependencies())
@@ -3475,7 +3474,6 @@ qom = declare_dependency(objects: 
libqom.extract_all_objects(recursive: false),
 event_loop_base = files('event-loop-base.c')
 event_loop_base = static_library('event-loop-base',
  sources: event_loop_base + genh,
- name_suffix: 'fa',
  build_by_default: false)
 event_loop_base = declare_dependency(objects: 
event_loop_base.extract_all_objects(recursive: false),
  dependencies: [qom])
@@ -3728,7 +3726,6 @@ qemu_syms = custom_target('qemu.syms', output: 
'qemu.syms',
 authz_ss = authz_ss.apply({})
 libauthz = static_library('authz', authz_ss.sources() + genh,
   dependencies: [authz_ss.dependencies()],
-  name_suffix: 'fa',
   build_by_default: false)
 
 authz = declare_dependency(objects: libauthz.extract_all_objects(recursive: 
false),
@@ -3737,7 +3734,6 @@ authz = declare_dependency(objects: 
libauthz.extract_all_objects(recursive: fals
 crypto_ss = crypto_ss.apply({})
 libcrypto = static_library('crypto', crypto_ss.sources() + genh,
dependencies: [crypto_ss.dependencies()],
-   name_suffix: 'fa',
build_by_default: false)
 
 crypto = declare_dependency(objects: libcrypto.extract_all_objects(recursive: 
false),
@@ -3747,14 +3743,12 @@ io_ss = io_ss.apply({})
 libio = static_library('io', io_ss.sources() + genh,
dependencies: [io_ss.dependencies()],
link_with: libqemuutil,
-   name_suffix: 'fa',
build_by_default: false)
 
 io = declare_dependency(objects: libio.extract_all_objects(recursive: false),
 dependencies: [io_ss.dependencies(), crypto, qom])
 
 libmigration = static_library('migration'

[PULL 03/16] meson: merge plugin_ldflags into emulator_link_args

2024-07-04 Thread Paolo Bonzini

These serve the same purpose, except plugin_ldflags ends up in the linker
command line in a more roundabout way (through specific_ss).  Simplify.

Signed-off-by: Paolo Bonzini 
---
 plugins/meson.build | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/plugins/meson.build b/plugins/meson.build
index 51b4350c2a0..18a0303bff9 100644
--- a/plugins/meson.build
+++ b/plugins/meson.build
@@ -1,4 +1,3 @@
-plugin_ldflags = []
 # Modules need more symbols than just those in plugins/qemu-plugins.symbols
 if not enable_modules
   if host_os == 'darwin'
@@ -7,9 +6,9 @@ if not enable_modules
   output: 'qemu-plugins-ld64.symbols',
   capture: true,
   command: ['sed', '-ne', 's/^[[:space:]]*\\(qemu_.*\\);/_\\1/p', 
'@INPUT@'])
-plugin_ldflags = 
['-Wl,-exported_symbols_list,plugins/qemu-plugins-ld64.symbols']
+emulator_link_args += 
['-Wl,-exported_symbols_list,plugins/qemu-plugins-ld64.symbols']
   else
-plugin_ldflags = ['-Xlinker', '--dynamic-list=' + 
(meson.project_source_root() / 'plugins/qemu-plugins.symbols')]
+emulator_link_args += ['-Xlinker', '--dynamic-list=' + 
(meson.project_source_root() / 'plugins/qemu-plugins.symbols')]
   endif
 endif
 
@@ -37,5 +36,5 @@ if get_option('plugins')
 'loader.c',
 'core.c',
 'api.c',
-  ), declare_dependency(link_args: plugin_ldflags))
+  ))
 endif
-- 
2.45.2

[PULL 11/16] i386/sev: Fix error message in sev_get_capabilities()

2024-07-04 Thread Paolo Bonzini

From: Michal Privoznik 

When a custom path is provided to sev-guest object and opening
the path fails an error message is reported. But the error
message still mentions DEFAULT_SEV_DEVICE ("/dev/sev") instead of
the custom path.

Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0
Signed-off-by: Michal Privoznik 
Reviewed-by: Philippe Mathieu-Daudé 
Link: 
https://lore.kernel.org/r/b4648905d399780063dc70851d3d6a3cd28719a5.1719218926.git.mpriv...@redhat.com
Signed-off-by: Paolo Bonzini 
---
 target/i386/sev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 2a0f94d390d..054366878aa 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -597,7 +597,7 @@ static SevCapability *sev_get_capabilities(Error **errp)
 fd = open(sev_device, O_RDWR);
 if (fd < 0) {
 error_setg_errno(errp, errno, "SEV: Failed to open %s",
- DEFAULT_SEV_DEVICE);
+ sev_device);
 g_free(sev_device);
 return NULL;
 }
-- 
2.45.2

[PULL 12/16] i386/sev: Fallback to the default SEV device if none provided in sev_get_capabilities()

2024-07-04 Thread Paolo Bonzini

From: Michal Privoznik 

When management tools (e.g. libvirt) query QEMU capabilities,
they start QEMU with a minimalistic configuration and issue
various commands on monitor. One of the command issued is/might
be "query-sev-capabilities" to learn values like cbitpos or
reduced-phys-bits. But as of v9.0.0-1145-g16dcf200dc the monitor
command returns an error instead.

This creates a chicken-egg problem because in order to query
those aforementioned values QEMU needs to be started with a
'sev-guest' object. But to start QEMU with the values must be
known.

I think it's safe to assume that the default path ("/dev/sev")
provides the same data as user provided one. So fall back to it.

Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0
Signed-off-by: Michal Privoznik 
Link: 
https://lore.kernel.org/r/157f93712c23818be193ce785f648f0060b33dee.1719218926.git.mpriv...@redhat.com
Signed-off-by: Paolo Bonzini 
---
 target/i386/sev.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 054366878aa..2f3dbe289f4 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -587,13 +587,13 @@ static SevCapability *sev_get_capabilities(Error **errp)
 }
 
 sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
-if (!sev_common) {
-error_setg(errp, "SEV is not configured");
-return NULL;
+if (sev_common) {
+sev_device = object_property_get_str(OBJECT(sev_common), "sev-device",
+ &error_abort);
+} else {
+sev_device = g_strdup(DEFAULT_SEV_DEVICE);
 }
 
-sev_device = object_property_get_str(OBJECT(sev_common), "sev-device",
- &error_abort);
 fd = open(sev_device, O_RDWR);
 if (fd < 0) {
 error_setg_errno(errp, errno, "SEV: Failed to open %s",
-- 
2.45.2

[PULL 05/16] Revert "meson: Propagate gnutls dependency"

2024-07-04 Thread Paolo Bonzini

From: Akihiko Odaki 

This reverts commit 3eacf70bb5a83e4775ad8003cbca63a40f70c8c2.

It was only needed because of duplicate objects caused by
declare_dependency(link_whole: ...), and can be dropped now
that meson.build specifies objects and dependencies separately
for the internal dependencies.

Signed-off-by: Akihiko Odaki 
Message-ID: <20240524-objects-v1-2-07cbbe961...@daynix.com>
Signed-off-by: Paolo Bonzini 
---
 meson.build| 4 ++--
 block/meson.build  | 2 +-
 io/meson.build | 2 +-
 storage-daemon/meson.build | 2 +-
 ui/meson.build | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/meson.build b/meson.build
index 0c314ae5701..429899d8603 100644
--- a/meson.build
+++ b/meson.build
@@ -3526,7 +3526,7 @@ if have_block
 'blockdev-nbd.c',
 'iothread.c',
 'job-qmp.c',
-  ), gnutls)
+  ))
 
   # os-posix.c contains POSIX-specific functions used by qemu-storage-daemon,
   # os-win32.c does not
@@ -4044,7 +4044,7 @@ if have_tools
  dependencies: [block, qemuutil], install: true)
   qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
link_args: '@block.syms', link_depends: block_syms,
-   dependencies: [blockdev, qemuutil, gnutls, selinux],
+   dependencies: [blockdev, qemuutil, selinux],
install: true)
 
   subdir('storage-daemon')
diff --git a/block/meson.build b/block/meson.build
index 158dc3b89db..f1262ec2ba8 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -39,7 +39,7 @@ block_ss.add(files(
   'throttle.c',
   'throttle-groups.c',
   'write-threshold.c',
-), zstd, zlib, gnutls)
+), zstd, zlib)
 
 system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
 system_ss.add(files('block-ram-registrar.c'))
diff --git a/io/meson.build b/io/meson.build
index 283b9b2bdbd..1164812f912 100644
--- a/io/meson.build
+++ b/io/meson.build
@@ -13,4 +13,4 @@ io_ss.add(files(
   'dns-resolver.c',
   'net-listener.c',
   'task.c',
-), gnutls)
+))
diff --git a/storage-daemon/meson.build b/storage-daemon/meson.build
index fd5e32f4b28..5e61a9d1bdf 100644
--- a/storage-daemon/meson.build
+++ b/storage-daemon/meson.build
@@ -1,6 +1,6 @@
 qsd_ss = ss.source_set()
 qsd_ss.add(files('qemu-storage-daemon.c'))
-qsd_ss.add(blockdev, chardev, qmp, qom, qemuutil, gnutls)
+qsd_ss.add(blockdev, chardev, qmp, qom, qemuutil)
 
 subdir('qapi')
 
diff --git a/ui/meson.build b/ui/meson.build
index cfbf29428df..28c7381dd10 100644
--- a/ui/meson.build
+++ b/ui/meson.build
@@ -44,7 +44,7 @@ vnc_ss.add(files(
   'vnc-jobs.c',
   'vnc-clipboard.c',
 ))
-vnc_ss.add(zlib, jpeg, gnutls)
+vnc_ss.add(zlib, jpeg)
 vnc_ss.add(when: sasl, if_true: files('vnc-auth-sasl.c'))
 system_ss.add_all(when: [vnc, pixman], if_true: vnc_ss)
 system_ss.add(when: vnc, if_false: files('vnc-stubs.c'))
-- 
2.45.2

[PULL 02/16] meson: move block.syms dependency out of libblock

2024-07-04 Thread Paolo Bonzini

In order to define libqemuutil symbols that are requested by block modules,
QEMU currently uses a combination of the "link_depends" argument of
libraries (which is propagated into dependencies, but not available in
dependencies) and the "link_args" argument of declare_dependency()
(which _is_ available in static_library, but probably not used for
historical reasons only).

Unfortunately the link_depends will not be propagated into the
"block" dependency if it is defined using
declare_dependency(objects: ...); and it is not possible to
add it directly to the dependency because the keyword argument
simply is not available.

The only solution, in order to switch to defining the dependency
without using "link_whole" (which has problems of its own, see
https://github.com/mesonbuild/meson/pull/8151#issuecomment-754796420),
is unfortunately to add the link_args and link_depends to the
executables directly; fortunately there is just four of them.

It is possible (and I will look into it) to add "link_depends"
to declare_dependency(), but it probably will be a while before
QEMU can use it.

Signed-off-by: Paolo Bonzini 
---
 meson.build| 5 +++--
 storage-daemon/meson.build | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/meson.build b/meson.build
index 8909f8c87d9..df9a64302f0 100644
--- a/meson.build
+++ b/meson.build
@@ -3759,12 +3759,10 @@ system_ss.add(migration)
 block_ss = block_ss.apply({})
 libblock = static_library('block', block_ss.sources() + genh,
   dependencies: block_ss.dependencies(),
-  link_depends: block_syms,
   name_suffix: 'fa',
   build_by_default: false)
 
 block = declare_dependency(link_whole: [libblock],
-   link_args: '@block.syms',
dependencies: [crypto, io])
 
 blockdev_ss = blockdev_ss.apply({})
@@ -4033,10 +4031,13 @@ endif
 
 if have_tools
   qemu_img = executable('qemu-img', [files('qemu-img.c'), hxdep],
+ link_args: '@block.syms', link_depends: block_syms,
  dependencies: [authz, block, crypto, io, qom, qemuutil], install: 
true)
   qemu_io = executable('qemu-io', files('qemu-io.c'),
+ link_args: '@block.syms', link_depends: block_syms,
  dependencies: [block, qemuutil], install: true)
   qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
+   link_args: '@block.syms', link_depends: block_syms,
dependencies: [blockdev, qemuutil, gnutls, selinux],
install: true)
 
diff --git a/storage-daemon/meson.build b/storage-daemon/meson.build
index 46267b63e72..fd5e32f4b28 100644
--- a/storage-daemon/meson.build
+++ b/storage-daemon/meson.build
@@ -8,6 +8,7 @@ if have_tools
   qsd_ss = qsd_ss.apply({})
   qsd = executable('qemu-storage-daemon',
qsd_ss.sources(),
+   link_args: '@block.syms', link_depends: block_syms,
dependencies: qsd_ss.dependencies(),
install: true)
 endif
-- 
2.45.2

[PULL 04/16] meson: Pass objects and dependencies to declare_dependency()

2024-07-04 Thread Paolo Bonzini

From: Akihiko Odaki 

We used to request declare_dependency() to link_whole static libraries.
If a static library is a thin archive, GNU ld keeps all object files
referenced by the archive open, and sometimes exceeds the open file limit.

Another problem with link_whole is that suboptimal handling of nested
dependencies.

link_whole by itself does not propagate dependencies. In particular,
gnutls, a dependency of crypto, is not propagated to its users, and we
currently workaround the issue by declaring gnutls as a dependency for
each crypto user.  On the other hand, if you write something like

  libfoo = static_library('foo', 'foo.c', dependencies: gnutls)
  foo = declare_dependency(link_whole: libfoo)

  libbar = static_library('bar', 'bar.c', dependencies: foo)
  bar = declare_dependency(link_whole: libbar, dependencies: foo)
  executable('prog', sources: files('prog.c'), dependencies: [foo, bar])

hoping to propagate the gnutls dependency into bar.c, you'll see a
linking failure for "prog", because the foo.c.o object file is included in
libbar.a and therefore it is linked twice into "prog": once from libfoo.a
and once from libbar.a.  Here Meson does not see the duplication, it
just asks the linker to link all of libfoo.a and libbar.a into "prog".

Instead of using link_whole, extract objects included in static libraries
and pass them to declare_dependency(); and then the dependencies can be
added as well so that they are propagated, because object files on the
linker command line are always deduplicated.

This requires Meson 1.1.0 or later.

Signed-off-by: Akihiko Odaki 
Message-ID: <20240524-objects-v1-1-07cbbe961...@daynix.com>
Signed-off-by: Paolo Bonzini 
---
 docs/devel/build-system.rst|  3 ++-
 meson.build| 44 +++---
 gdbstub/meson.build|  4 ++--
 pythondeps.toml|  2 +-
 tcg/meson.build|  6 +++--
 tests/qtest/libqos/meson.build |  2 +-
 6 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst
index f4fd76117df..39a1934c63f 100644
--- a/docs/devel/build-system.rst
+++ b/docs/devel/build-system.rst
@@ -238,7 +238,8 @@ Subsystem sourcesets:
 name_suffix: 'fa',
 build_by_default: false)
 
-chardev = declare_dependency(link_whole: libchardev)
+chardev = declare_dependency(objects: 
libchardev.extract_all_objects(recursive: false),
+ dependencies: chardev_ss.dependencies())
 
   As of Meson 0.55.1, the special ``.fa`` suffix should be used for everything
   that is used with ``link_whole``, to ensure that the link flags are placed
diff --git a/meson.build b/meson.build
index df9a64302f0..0c314ae5701 100644
--- a/meson.build
+++ b/meson.build
@@ -1,4 +1,4 @@
-project('qemu', ['c'], meson_version: '>=0.63.0',
+project('qemu', ['c'], meson_version: '>=1.1.0',
 default_options: ['warning_level=1', 'c_std=gnu11', 'cpp_std=gnu++11', 
'b_colorout=auto',
   'b_staticpic=false', 'stdsplit=false', 
'optimization=2', 'b_pie=true'],
 version: files('VERSION'))
@@ -3461,7 +3461,7 @@ endif
 
 if enable_modules
   libmodulecommon = static_library('module-common', files('module-common.c') + 
genh, pic: true, c_args: '-DBUILD_DSO')
-  modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: 
'-DBUILD_DSO')
+  modulecommon = declare_dependency(objects: 
libmodulecommon.extract_all_objects(recursive: false), compile_args: 
'-DBUILD_DSO')
 endif
 
 qom_ss = qom_ss.apply({})
@@ -3469,14 +3469,15 @@ libqom = static_library('qom', qom_ss.sources() + genh,
 dependencies: [qom_ss.dependencies()],
 name_suffix: 'fa',
 build_by_default: false)
-qom = declare_dependency(link_whole: libqom)
+qom = declare_dependency(objects: libqom.extract_all_objects(recursive: false),
+ dependencies: qom_ss.dependencies())
 
 event_loop_base = files('event-loop-base.c')
 event_loop_base = static_library('event-loop-base',
  sources: event_loop_base + genh,
  name_suffix: 'fa',
  build_by_default: false)
-event_loop_base = declare_dependency(link_whole: event_loop_base,
+event_loop_base = declare_dependency(objects: 
event_loop_base.extract_all_objects(recursive: false),
  dependencies: [qom])
 
 stub_ss = stub_ss.apply({})
@@ -3621,7 +3622,8 @@ foreach d, list : modules
   endif
   emulator_modules += shared_module(sl.name(),
 name_prefix: '',
-link_whole: sl,
+objects: sl.extract_all_objects(recursive: false),
+dependencies: module_ss.dependencies(),
 install: true,
 install_

[PULL 16/16] target/i386/SEV: implement mask_cpuid_features

2024-07-04 Thread Paolo Bonzini

Drop features that are listed as "BitMask" in the PPR and currently
not supported by AMD processors.  The only ones that may become useful
in the future are TSC deadline timer and x2APIC, everything else is
not needed for SEV-SNP guests (e.g. VIRT_SSBD) or would require
processor support (e.g. TSC_ADJUST).

This allows running SEV-SNP guests with "-cpu host".

Signed-off-by: Paolo Bonzini 
---
 target/i386/cpu.h |  4 
 target/i386/sev.c | 33 +
 2 files changed, 37 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 0d5624355e4..c43ac01c794 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -812,6 +812,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, 
FeatureWord w);
 
 /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
 #define CPUID_7_0_EBX_FSGSBASE  (1U << 0)
+/* Support TSC adjust MSR */
+#define CPUID_7_0_EBX_TSC_ADJUST(1U << 1)
 /* Support SGX */
 #define CPUID_7_0_EBX_SGX   (1U << 2)
 /* 1st Group of Advanced Bit Manipulation Extensions */
@@ -1002,6 +1004,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, 
FeatureWord w);
 #define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON(1U << 17)
 /* Speculative Store Bypass Disable */
 #define CPUID_8000_0008_EBX_AMD_SSBD(1U << 24)
+/* Paravirtualized Speculative Store Bypass Disable MSR */
+#define CPUID_8000_0008_EBX_VIRT_SSBD   (1U << 25)
 /* Predictive Store Forwarding Disable */
 #define CPUID_8000_0008_EBX_AMD_PSFD(1U << 28)
 
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 2f3dbe289f4..2ba5f517228 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -945,6 +945,38 @@ out:
 return ret;
 }
 
+static uint32_t
+sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, 
uint32_t index,
+int reg, uint32_t value)
+{
+switch (feature) {
+case 1:
+if (reg == R_ECX) {
+return value & ~CPUID_EXT_TSC_DEADLINE_TIMER;
+}
+break;
+case 7:
+if (index == 0 && reg == R_EBX) {
+return value & ~CPUID_7_0_EBX_TSC_ADJUST;
+}
+if (index == 0 && reg == R_EDX) {
+return value & ~(CPUID_7_0_EDX_SPEC_CTRL |
+ CPUID_7_0_EDX_STIBP |
+ CPUID_7_0_EDX_FLUSH_L1D |
+ CPUID_7_0_EDX_ARCH_CAPABILITIES |
+ CPUID_7_0_EDX_CORE_CAPABILITY |
+ CPUID_7_0_EDX_SPEC_CTRL_SSBD);
+}
+break;
+case 0x8008:
+if (reg == R_EBX) {
+return value & ~CPUID_8000_0008_EBX_VIRT_SSBD;
+}
+break;
+}
+return value;
+}
+
 static int
 sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa,
uint8_t *addr, size_t len)
@@ -2315,6 +2347,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
 klass->launch_finish = sev_snp_launch_finish;
 klass->launch_update_data = sev_snp_launch_update_data;
 klass->kvm_init = sev_snp_kvm_init;
+x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features;
 x86_klass->kvm_type = sev_snp_kvm_type;
 
 object_class_property_add(oc, "policy", "uint64",
-- 
2.45.2

[PULL 10/16] target/i386: do not include undefined bits in the AMD topoext leaf

2024-07-04 Thread Paolo Bonzini

Commit d7c72735f61 ("target/i386: Add new EPYC CPU versions with updated
cache_info", 2023-05-08) ensured that AMD-defined CPU models did not
have the 'complex_indexing' bit set, but left it set in "-cpu host"
which uses the default ("legacy") cache information.

Reimplement that commit using a CPU feature, so that it can be applied
to all guests using a new machine type, independent of the CPU model.

Signed-off-by: Paolo Bonzini 
---
 target/i386/cpu.h | 3 +++
 hw/i386/pc.c  | 1 +
 target/i386/cpu.c | 4 
 3 files changed, 8 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 9bea7142bf4..0d5624355e4 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2104,6 +2104,9 @@ struct ArchCPU {
 /* Only advertise CPUID leaves defined by the vendor */
 bool vendor_cpuid_only;
 
+/* Only advertise TOPOEXT features that AMD defines */
+bool amd_topoext_features_only;
+
 /* Enable auto level-increase for Intel Processor Trace leave */
 bool intel_pt_auto_level;
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 77415064c62..5dff91422ff 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -80,6 +80,7 @@
 { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },
 
 GlobalProperty pc_compat_9_0[] = {
+{ TYPE_X86_CPU, "x-amd-topoext-features-only", "false" },
 { TYPE_X86_CPU, "x-l1-cache-per-thread", "false" },
 { TYPE_X86_CPU, "guest-phys-bits", "0" },
 { "sev-guest", "legacy-vm-type", "true" },
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 5e5bf71702c..c40551d9bfb 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6982,6 +6982,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *eax = *ebx = *ecx = *edx = 0;
 break;
 }
+if (cpu->amd_topoext_features_only) {
+*edx &= CACHE_NO_INVD_SHARING | CACHE_INCLUSIVE;
+}
 break;
 case 0x801E:
 if (cpu->core_id <= 255) {
@@ -8293,6 +8296,7 @@ static Property x86_cpu_properties[] = {
 DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor),
 DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true),
 DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true),
+DEFINE_PROP_BOOL("x-amd-topoext-features-only", X86CPU, 
amd_topoext_features_only, true),
 DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false),
 DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true),
 DEFINE_PROP_BOOL("kvm-pv-enforce-cpuid", X86CPU, kvm_pv_enforce_cpuid,
-- 
2.45.2

[PULL 08/16] target/i386: drop AMD machine check bits from Intel CPUID

2024-07-04 Thread Paolo Bonzini

The recent addition of the SUCCOR bit to kvm_arch_get_supported_cpuid()
causes the bit to be visible when "-cpu host" VMs are started on Intel
processors.

While this should in principle be harmless, it's not tidy and we don't
even know for sure that it doesn't cause any guest OS to take unexpected
paths.  Since x86_cpu_get_supported_feature_word() can return different
different values depending on the guest, adjust it to hide the SUCCOR
bit if the guest has non-AMD vendor.

Suggested-by: Xiaoyao Li 
Cc: John Allen 
Signed-off-by: Paolo Bonzini 
---
 target/i386/cpu.c | 23 +++
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 4364cb0f8e3..5e5bf71702c 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6039,6 +6039,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, 
FeatureWord w)
 {
 FeatureWordInfo *wi = &feature_word_info[w];
 uint64_t r = 0;
+uint32_t unavail = 0;
 
 if (kvm_enabled()) {
 switch (wi->type) {
@@ -6064,19 +6065,33 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU 
*cpu, FeatureWord w)
 } else {
 return ~0;
 }
+
+switch (w) {
 #ifndef TARGET_X86_64
-if (w == FEAT_8000_0001_EDX) {
+case FEAT_8000_0001_EDX:
 /*
  * 32-bit TCG can emulate 64-bit compatibility mode.  If there is no
  * way for userspace to get out of its 32-bit jail, we can leave
  * the LM bit set.
  */
-uint32_t unavail = tcg_enabled()
+unavail = tcg_enabled()
 ? CPUID_EXT2_LM & ~CPUID_EXT2_KERNEL_FEATURES
 : CPUID_EXT2_LM;
-r &= ~unavail;
-}
+break;
 #endif
+
+case FEAT_8000_0007_EBX:
+if (cpu && !IS_AMD_CPU(&cpu->env)) {
+/* Disable AMD machine check architecture for Intel CPU.  */
+unavail = ~0;
+}
+break;
+
+default:
+break;
+}
+
+r &= ~unavail;
 if (cpu && cpu->migratable) {
 r &= x86_cpu_get_migratable_flags(w);
 }
-- 
2.45.2

[PULL 14/16] char-stdio: Restore blocking mode of stdout on exit

2024-07-04 Thread Paolo Bonzini

From: Maxim Mikityanskiy 

qemu_chr_open_fd() sets stdout into non-blocking mode. Restore the old
fd flags on exit to avoid breaking unsuspecting applications that run on
the same terminal after qemu and don't expect to get EAGAIN.

While at at, also ensure term_exit is called once (at the moment it's
called both from char_stdio_finalize() and as the atexit() hook.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2423
Signed-off-by: Maxim Mikityanskiy 
Link: https://lore.kernel.org/r/20240703190812.3459514-1-maxtra...@gmail.com
Signed-off-by: Paolo Bonzini 
---
 chardev/char-stdio.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/chardev/char-stdio.c b/chardev/char-stdio.c
index 3c648678ab1..b960ddd4e4c 100644
--- a/chardev/char-stdio.c
+++ b/chardev/char-stdio.c
@@ -41,6 +41,7 @@
 /* init terminal so that we can grab keys */
 static struct termios oldtty;
 static int old_fd0_flags;
+static int old_fd1_flags;
 static bool stdio_in_use;
 static bool stdio_allow_signal;
 static bool stdio_echo_state;
@@ -50,6 +51,8 @@ static void term_exit(void)
 if (stdio_in_use) {
 tcsetattr(0, TCSANOW, &oldtty);
 fcntl(0, F_SETFL, old_fd0_flags);
+fcntl(1, F_SETFL, old_fd1_flags);
+stdio_in_use = false;
 }
 }
 
@@ -102,6 +105,7 @@ static void qemu_chr_open_stdio(Chardev *chr,
 
 stdio_in_use = true;
 old_fd0_flags = fcntl(0, F_GETFL);
+old_fd1_flags = fcntl(1, F_GETFL);
 tcgetattr(0, &oldtty);
 if (!g_unix_set_fd_nonblocking(0, true, NULL)) {
 error_setg_errno(errp, errno, "Failed to set FD nonblocking");
-- 
2.45.2

[PULL 13/16] target/i386: add avx-vnni-int16 feature

2024-07-04 Thread Paolo Bonzini

AVX-VNNI-INT16 (CPUID[EAX=7,ECX=1).EDX[10]) is supported by Clearwater
Forest processor, add it to QEMU as it does not need any specific
enablement.

Signed-off-by: Paolo Bonzini 
---
 target/i386/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index c40551d9bfb..c05765eeafc 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1131,7 +1131,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
 .feat_names = {
 NULL, NULL, NULL, NULL,
 "avx-vnni-int8", "avx-ne-convert", NULL, NULL,
-"amx-complex", NULL, NULL, NULL,
+"amx-complex", NULL, "avx-vnni-int16", NULL,
 NULL, NULL, "prefetchiti", NULL,
 NULL, NULL, NULL, NULL,
 NULL, NULL, NULL, NULL,
-- 
2.45.2

[PULL 07/16] target/i386: pass X86CPU to x86_cpu_get_supported_feature_word

2024-07-04 Thread Paolo Bonzini

This allows modifying the bits in "-cpu max"/"-cpu host" depending on
the guest CPU vendor (which, at least by default, is the host vendor in
the case of KVM).

For example, machine check architecture differs between Intel and AMD,
and bits from AMD should be dropped when configuring the guest for
an Intel model.

Cc: Xiaoyao Li 
Cc: John Allen 
Signed-off-by: Paolo Bonzini 
---
 target/i386/cpu.h |  3 +--
 target/i386/cpu.c | 11 +--
 target/i386/kvm/kvm-cpu.c |  2 +-
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 29daf370485..9bea7142bf4 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -666,8 +666,7 @@ typedef enum FeatureWord {
 } FeatureWord;
 
 typedef uint64_t FeatureWordArray[FEATURE_WORDS];
-uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
-bool migratable_only);
+uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
 
 /* cpuid_features bits */
 #define CPUID_FP87 (1U << 0)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 4c2e6f3a71e..4364cb0f8e3 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6035,8 +6035,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error 
**errp)
 
 #endif /* !CONFIG_USER_ONLY */
 
-uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
-bool migratable_only)
+uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w)
 {
 FeatureWordInfo *wi = &feature_word_info[w];
 uint64_t r = 0;
@@ -6078,7 +6077,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 r &= ~unavail;
 }
 #endif
-if (migratable_only) {
+if (cpu && cpu->migratable) {
 r &= x86_cpu_get_migratable_flags(w);
 }
 return r;
@@ -7371,7 +7370,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
  * by the user.
  */
 env->features[w] |=
-x86_cpu_get_supported_feature_word(w, cpu->migratable) &
+x86_cpu_get_supported_feature_word(cpu, w) &
 ~env->user_features[w] &
 ~feature_word_info[w].no_autoenable_flags;
 }
@@ -7497,7 +7496,7 @@ static void x86_cpu_filter_features(X86CPU *cpu, bool 
verbose)
 
 for (w = 0; w < FEATURE_WORDS; w++) {
 uint64_t host_feat =
-x86_cpu_get_supported_feature_word(w, false);
+x86_cpu_get_supported_feature_word(NULL, w);
 uint64_t requested_features = env->features[w];
 uint64_t unavailable_features = requested_features & ~host_feat;
 mark_unavailable_features(cpu, w, unavailable_features, prefix);
@@ -7617,7 +7616,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error 
**errp)
 env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_LBR_FMT;
 if (requested_lbr_fmt && kvm_enabled()) {
 uint64_t host_perf_cap =
-x86_cpu_get_supported_feature_word(FEAT_PERF_CAPABILITIES, false);
+x86_cpu_get_supported_feature_word(NULL, FEAT_PERF_CAPABILITIES);
 unsigned host_lbr_fmt = host_perf_cap & PERF_CAP_LBR_FMT;
 
 if (!cpu->enable_pmu) {
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index d57a68a301e..6bf8dcfc607 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -143,7 +143,7 @@ static void kvm_cpu_xsave_init(void)
 if (!esa->size) {
 continue;
 }
-if ((x86_cpu_get_supported_feature_word(esa->feature, false) & 
esa->bits)
+if ((x86_cpu_get_supported_feature_word(NULL, esa->feature) & 
esa->bits)
 != esa->bits) {
 continue;
 }
-- 
2.45.2

[PULL 09/16] target/i386: SEV: fix formatting of CPUID mismatch message

2024-07-04 Thread Paolo Bonzini

Fixes: 70943ad8e4d ("i386/sev: Add support for SNP CPUID validation", 
2024-06-05)
Signed-off-by: Paolo Bonzini 
---
 target/i386/sev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 3ab8b3c28b7..2a0f94d390d 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -841,7 +841,7 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old,
 size_t i;
 
 if (old->count != new->count) {
-error_report("SEV-SNP: CPUID validation failed due to count mismatch,"
+error_report("SEV-SNP: CPUID validation failed due to count mismatch, "
  "provided: %d, expected: %d", old->count, new->count);
 return;
 }
@@ -853,8 +853,8 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old,
 new_func = &new->entries[i];
 
 if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) {
-error_report("SEV-SNP: CPUID validation failed for function 0x%x, 
index: 0x%x"
- "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 
0x%08x"
+error_report("SEV-SNP: CPUID validation failed for function 0x%x, 
index: 0x%x, "
+ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 
0x%08x, "
  "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 
0x%08x",
  old_func->eax_in, old_func->ecx_in,
  old_func->eax, old_func->ebx, old_func->ecx, 
old_func->edx,
-- 
2.45.2

[PULL 00/16] meson, i386 changes for 2024-07-04

2024-07-04 Thread Paolo Bonzini

The following changes since commit 1a2d52c7fcaeaaf4f2fe8d4d5183dccaeab67768:

  Merge tag 'pull-request-2024-07-02' of https://gitlab.com/thuth/qemu into 
staging (2024-07-02 15:49:08 -0700)

are available in the Git repository at:

  https://gitlab.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to 188569c10d5dc6996bde90ce25645083e9661ecb:

  target/i386/SEV: implement mask_cpuid_features (2024-07-04 11:56:20 +0200)


* meson: Pass objects and dependencies to declare_dependency(), not 
static_library()
* meson: Drop the .fa library suffix
* target/i386: drop AMD machine check bits from Intel CPUID
* target/i386: add avx-vnni-int16 feature
* target/i386: SEV bugfixes
* target/i386: SEV-SNP -cpu host support
* char: fix exit issues


Akihiko Odaki (2):
  meson: Pass objects and dependencies to declare_dependency()
  Revert "meson: Propagate gnutls dependency"

Maxim Mikityanskiy (1):
  char-stdio: Restore blocking mode of stdout on exit

Michal Privoznik (2):
  i386/sev: Fix error message in sev_get_capabilities()
  i386/sev: Fallback to the default SEV device if none provided in 
sev_get_capabilities()

Paolo Bonzini (11):
  meson: move shared_module() calls where modules are already walked
  meson: move block.syms dependency out of libblock
  meson: merge plugin_ldflags into emulator_link_args
  meson: Drop the .fa library suffix
  target/i386: pass X86CPU to x86_cpu_get_supported_feature_word
  target/i386: drop AMD machine check bits from Intel CPUID
  target/i386: SEV: fix formatting of CPUID mismatch message
  target/i386: do not include undefined bits in the AMD topoext leaf
  target/i386: add avx-vnni-int16 feature
  target/i386: add support for masking CPUID features in confidential guests
  target/i386/SEV: implement mask_cpuid_features

 docs/devel/build-system.rst |   8 +--
 meson.build | 100 ++--
 target/i386/confidential-guest.h|  24 +
 target/i386/cpu.h   |  10 +++-
 chardev/char-stdio.c|   4 ++
 hw/i386/pc.c|   1 +
 stubs/blk-exp-close-all.c   |   2 +-
 target/i386/cpu.c   |  40 +++
 target/i386/kvm/kvm-cpu.c   |   2 +-
 target/i386/kvm/kvm.c   |   5 ++
 target/i386/sev.c   |  51 ++
 .gitlab-ci.d/buildtest-template.yml |   2 -
 .gitlab-ci.d/buildtest.yml  |   2 -
 block/meson.build   |   2 +-
 gdbstub/meson.build |   6 +--
 io/meson.build  |   2 +-
 plugins/meson.build |   7 ++-
 pythondeps.toml |   2 +-
 storage-daemon/meson.build  |   3 +-
 tcg/meson.build |   8 +--
 tests/Makefile.include  |   2 +-
 tests/qtest/libqos/meson.build  |   3 +-
 ui/meson.build  |   2 +-
 23 files changed, 183 insertions(+), 105 deletions(-)
-- 
2.45.2

[PULL 15/16] target/i386: add support for masking CPUID features in confidential guests

2024-07-04 Thread Paolo Bonzini

Some CPUID features may be provided by KVM for some guests, independent of
processor support, for example TSC deadline or TSC adjust.  If these are
not supported by the confidential computing firmware, however, the guest
will fail to start.  Add support for removing unsupported features from
"-cpu host".

Signed-off-by: Paolo Bonzini 
---
 target/i386/confidential-guest.h | 24 
 target/i386/kvm/kvm.c|  5 +
 2 files changed, 29 insertions(+)

diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h
index 532e172a60b..7342d2843aa 100644
--- a/target/i386/confidential-guest.h
+++ b/target/i386/confidential-guest.h
@@ -39,6 +39,8 @@ struct X86ConfidentialGuestClass {
 
 /*  */
 int (*kvm_type)(X86ConfidentialGuest *cg);
+uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t 
feature, uint32_t index,
+int reg, uint32_t value);
 };
 
 /**
@@ -56,4 +58,26 @@ static inline int 
x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg)
 return 0;
 }
 }
+
+/**
+ * x86_confidential_guest_mask_cpuid_features:
+ *
+ * Removes unsupported features from a confidential guest's CPUID values, 
returns
+ * the value with the bits removed.  The bits removed should be those that KVM
+ * provides independent of host-supported CPUID features, but are not 
supported by
+ * the confidential computing firmware.
+ */
+static inline int 
x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg,
+ uint32_t feature, 
uint32_t index,
+ int reg, uint32_t 
value)
+{
+X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
+
+if (klass->mask_cpuid_features) {
+return klass->mask_cpuid_features(cg, feature, index, reg, value);
+} else {
+return value;
+}
+}
+
 #endif
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index dd8b0f33136..056d117cd11 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -548,6 +548,11 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, 
uint32_t function,
 ret |= 1U << KVM_HINTS_REALTIME;
 }
 
+if (current_machine->cgs) {
+ret = x86_confidential_guest_mask_cpuid_features(
+X86_CONFIDENTIAL_GUEST(current_machine->cgs),
+function, index, reg, ret);
+}
 return ret;
 }
 
-- 
2.45.2

Re: [PATCH 0/3] plugins: Few debugging cleanups

2024-07-04 Thread Alex Bennée

Philippe Mathieu-Daudé  writes:

> - Assert cpu_index is assigned in INIT/EXIT hooks
> - Free cpu->plugin_state
> - Restrict qemu_plugin_vcpu_init__async() to plugins/
>
> Philippe Mathieu-Daudé (3):
>   plugins: Ensure vCPU index is assigned in init/exit hooks
>   plugins: Free CPUPluginState before destroying vCPU state
>   accel/tcg: Move qemu_plugin_vcpu_init__async() to plugins/
>
>  include/qemu/plugin.h |  3 +++
>  hw/core/cpu-common.c  | 14 ++
>  plugins/core.c| 10 +-
>  3 files changed, 18 insertions(+), 9 deletions(-)

Queued to plugins/next, thanks.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH v7] virtio-net: Fix network stall at the host side waiting for kick

2024-07-04 Thread Yang Dongshan

Hi, Michael

> My suggestion:
> 
> 
> change virtqueue_get_avail_bytes to return the shadow
> in an opaque unsigned value.
> 
> 
> add virtqueue_poll that gets this opaque and tells us whether any new
> buffers became available in the queue since that value> 
> was returned.


> accordingly, virtio_queue_set_notification_and_check
> will accept this opaque value and check avail buffers
> against it.

According to your suggestion, it's able to handle the case where the
queue is not empty, when the queue is empty, should I add an API to
get the shadow idx as virtio_queue_set_notification_and_check()
needs the opaque arg.

What value should return from virtqueue_get_avail_bytes() in case of
error branch in the function? 

On 2024/7/2, 19:27, "Michael S. Tsirkin" mailto:m...@redhat.com>> wrote:


On Tue, Jul 02, 2024 at 07:45:31AM +0800, Yang Dongshan wrote:
> > what does "changed" mean here? changed compared to what?
> For a split queue, if the shadow_avail_idx synced from avail ring idx
> by vring_avail_idx(vq) last time doesn't equal the current value of avail ring
> idx.
> 
> vq->shadow_avail_idx != vring_avail_idx(vq);
> 
> For packed queue, the logic is similar, if vq->shadow_avail_idx
> 
> becomes available, it means the guest has added buf at the slot.
> 
> vring_packed_desc_read(vq->vdev, &desc, &caches->desc,
> 
> vq->shadow_avail_idx, true);
> 
> if (is_desc_avail(desc.flags, vq-> shadow_avail_wrap_counter))
> 
> return true;
> 
> 


This answer does not make sense from API POV.


My suggestion:


change virtqueue_get_avail_bytes to return the shadow
in an opaque unsigned value.


add virtqueue_poll that gets this opaque and tells us whether any new
buffers became available in the queue since that value
was returned.


accordingly, virtio_queue_set_notification_and_check
will accept this opaque value and check avail buffers
against it.






> On Tue, Jul 2, 2024 at 2:46 AM Michael S. Tsirkin  > wrote:
> 
> On Tue, Jul 02, 2024 at 01:18:15AM +0800, Yang Dongshan wrote:
> > > Please document what this does.
> > okay, i will.
> >
> > > So this will return false if ring has any available buffers?
> > > Equivalent to:
> > > 
> > > bool virtio_queue_set_notification_and_check(VirtQueue *vq, int enable)
> > > {
> > > virtio_queue_packed_set_notification(vq, enable);
> > > return virtio_queue_empty(vq);
> > > }
> >
> > No, only when the shadow_avail_idx is changed shall the function return
> true,
> 
> 
> what does "changed" mean here? changed compared to what?
> 
> > compared with the value seen by the host last time, else return false
> even if
> > there are some buffers available in the queue, as the total size of the
> > available
> > buffers in the queue can't satisfy the request.
> >
> > It maybe better to pass only one arg to the function like this:
> > bool virtio_queue_set_notification_and_check(VirtQueue *vq)
> > {
> > virtio_queue_packed_set_notification(vq, true);
> > 
> > return shadow_avail_idx_changed()? true: false;
> > }
> >
> > Thanks Michael a lot!
> >
> > On Mon, Jul 1, 2024 at 11:05 PM Michael S. Tsirkin  > >
> wrote:
> >
> > On Mon, Jul 01, 2024 at 10:00:17PM +0800, Wencheng Yang wrote:
> > > From: thomas  > > >
> > >
> > > Patch 06b12970174 ("virtio-net: fix network stall under load")
> > > added double-check to test whether the available buffer size
> > > can satisfy the request or not, in case the guest has added
> > > some buffers to the avail ring simultaneously after the first
> > > check. It will be lucky if the available buffer size becomes
> > > okay after the double-check, then the host can send the packet
> > > to the guest. If the buffer size still can't satisfy the request,
> > > even if the guest has added some buffers, viritio-net would
> > > stall at the host side forever.
> > >
> > > The patch checks whether the guest has added some buffers
> > > after last check of avail idx when the available buffers are
> > > not sufficient, if so then recheck the available buffers
> > > in the loop.
> > >
> > > The patch also reverts patch "06b12970174".
> > >
> > > The case below can reproduce the stall.
> > >
> > > Guest 0
> > > ++
> > > | iperf |
> > > ---> | server |
> > > Host | ++
> > > ++ | ...
> > > | iperf |
> > > | client | Guest n
> > > ++ | ++
> > > | | iperf |
> > > ---> | server |
> > > ++
> > >
> > > Boot many guests from qemu with virtio network:
> > > qemu ... -netdev tap,id=net_x \
> > > -device virtio-net-pci-non-transitional,\
> > > iommu_platform=on,mac=xx:xx:xx:xx:xx:xx,netdev=net_x
> > >
> > > Each guest acts as iperf server with commands below:
> > > iperf3 -s -D -i 10 -p 8001
> > > iperf3 -s -D -i 10 -p 8002
> > >
> > > The host as iperf client:
> > > iperf3 -c guest_IP -p 8001 -i 30 -w 256k -P 20 -t 4
> > > iperf3 -c guest_IP -p 8002 -i 30 -w 256k -P 20 -t 4
> > >
> > > After some time

Re: [PATCH v1 1/2] physmem: Bail out qemu_ram_block_from_host() for invalid ram addrs

2024-07-04 Thread Alex Bennée

"Edgar E. Iglesias"  writes:

> From: "Edgar E. Iglesias" 
>
> Bail out in qemu_ram_block_from_host() when
> xen_ram_addr_from_mapcache() does not find an existing
> mapping.
>
> Signed-off-by: Edgar E. Iglesias 
> ---
>  system/physmem.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/system/physmem.c b/system/physmem.c
> index 33d09f7571..59d1576c2b 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -2277,6 +2277,10 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool 
> round_offset,
>  ram_addr_t ram_addr;
>  RCU_READ_LOCK_GUARD();
>  ram_addr = xen_ram_addr_from_mapcache(ptr);
> +if (ram_addr == RAM_ADDR_INVALID) {
> +return NULL;
> +}
> +

Isn't this indicative of a failure? Should there at least be a trace
point for failed mappings?

>  block = qemu_get_ram_block(ram_addr);
>  if (block) {
>  *offset = ram_addr - block->offset;

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH 2/2] system/vl.c: parse all -accel options

2024-07-04 Thread Alex Bennée

Paolo Bonzini  writes:

> On Mon, Jul 1, 2024 at 4:34 PM Philippe Mathieu-Daudé  
> wrote:
>> Reviewed-by: Philippe Mathieu-Daudé 
>
> In principle, a Reviewed-by tag is just stating that you don't know of
> any issues that would prevent the patch being included. However, as a
> frequent participant to the project, your Reviewed-by tag carries some
> weight and, to some extent, it is also a statement that you understand
> the area being modified.  A Reviewed-by from an experienced
> contributor may even imply that you could take the patch in one of
> your pull requests. (*) That makes it even more important to
> understand the area.

I think you are attaching a little too much weight to a r-b tag here as
no one was suggesting the patch go in via a different tree. Ultimately
the maintainer can always NACK a reviewed patch. 

> I would expect that anyone with an understanding of command line
> parsing would know 1) what -accel kvm -accel tcg does, and 2) what
> .merge_lists does; and this would be enough to flag an issue
> preventing the patch from being included.

Maybe more useful would be re-wording the comment:

  /* Merge multiple uses of option into a single list? */

to be explicit about its behaviour. 

> To be clear, I don't expect reviews to be perfect. But in this case
> I'm speaking up because the patch is literally a one line declarative
> change, and the only way to say "I've reviewed it" is by understanding
> the deeper effects of that line.

I think that's a fairly subjective requirement for something that
generally we can always use more of. I encourage people to review all
around the code base to get familiar with new sub-systems. I don't think
we should be dissuading people from exploring outside their silos. That
simple one liners can trip people up says more about the code than the
reviewer.

I sympathise with Philippe here who's current brief takes him around our
large and interconnected code base more than most.

>
> Also, I think it's fair that the submitter didn't spot the problem;
> it's okay to send out broken patches, that's part of the learning
> experience. :)
>
> Paolo
>
> (*) as opposed to Acked-by, where your review probably has been more
> conceptual than technical, and that you don't really want to take the
> patch in a pull request.
>
>
> Paolo

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PATCH] hw/loongarch/boot.c: fix out-of-bound reading

2024-07-04 Thread gaosong


在 2024/6/28 下午8:39, Dmitry Frolov 写道:

memcpy() is trying to READ 512 bytes from memory,
pointed by info->kernel_cmdline,
which was (presumable) allocated by g_strdup("");
Found with ASAN, making check with enabled sanitizers.

Signed-off-by: Dmitry Frolov 
---
  hw/loongarch/boot.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c
index b8e1aa18d5..cb668703bd 100644
--- a/hw/loongarch/boot.c
+++ b/hw/loongarch/boot.c
@@ -163,7 +163,7 @@ static void init_cmdline(struct loongarch_boot_info *info, 
void *p, void *start)
  info->a0 = 1;
  info->a1 = cmdline_addr;
  
-memcpy(p, info->kernel_cmdline, COMMAND_LINE_SIZE);

+g_strlcpy(p, info->kernel_cmdline, COMMAND_LINE_SIZE);
  }
  
  static uint64_t cpu_loongarch_virt_to_phys(void *opaque, uint64_t addr)

Reviewed-by: Song Gao 

Thanks.
Song Gao

Re: [PATCH] spapr: Migrate ail-mode-3 spapr cap

2024-07-04 Thread Nicholas Piggin

On Mon Jul 1, 2024 at 5:10 PM AEST, Michael Tokarev wrote:
> 06.06.2024 14:26, Michael Tokarev wrote:
> > 06.05.2024 14:56, Nicholas Piggin wrote:
> >> This cap did not add the migration code when it was introduced. This
> >> results in migration failure when changing the default using the
> >> command line.
> >>
> >> Cc: qemu-sta...@nongnu.org
> >> Fixes: ccc5a4c5e10 ("spapr: Add SPAPR_CAP_AIL_MODE_3 for AIL mode 3 
> >> support for H_SET_MODE hcall")
> >> Signed-off-by: Nicholas Piggin 
> >> ---
> >>   include/hw/ppc/spapr.h | 1 +
> >>   hw/ppc/spapr.c | 1 +
> >>   hw/ppc/spapr_caps.c    | 1 +
> >>   3 files changed, 3 insertions(+)
> > 
> > Hi!
> > 
> > Has this change been forgotten?  It's aimed at -stable, so must be
> > fixing a real issue.
>
> Ping #2 ? :)

Ah sorry, I meant to deal with this before. But yes I did forget it in
the last PR and it does fix a migration issue. Thanks for the reminder
of it.

I'll send it in  soon.

Thanks,
Nick

[PATCH] i386/cpu: Drop the check of phys_bits in host_cpu_realizefn()

2024-07-04 Thread Xiaoyao Li

The check of cpu->phys_bits to be in range between
[32, TARGET_PHYS_ADDR_SPACE_BITS] in host_cpu_realizefn()
is duplicated with check in x86_cpu_realizefn().

Since the ckeck in x86_cpu_realizefn() is called later and can cover all
teh x86 case. Remove the one in host_cpu_realizefn().

Signed-off-by: Xiaoyao Li 
---
 target/i386/host-cpu.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c
index 8b8bf5afeccf..b109c1a2221f 100644
--- a/target/i386/host-cpu.c
+++ b/target/i386/host-cpu.c
@@ -75,17 +75,7 @@ bool host_cpu_realizefn(CPUState *cs, Error **errp)
 CPUX86State *env = &cpu->env;
 
 if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
-uint32_t phys_bits = host_cpu_adjust_phys_bits(cpu);
-
-if (phys_bits &&
-(phys_bits > TARGET_PHYS_ADDR_SPACE_BITS ||
- phys_bits < 32)) {
-error_setg(errp, "phys-bits should be between 32 and %u "
-   " (but is %u)",
-   TARGET_PHYS_ADDR_SPACE_BITS, phys_bits);
-return false;
-}
-cpu->phys_bits = phys_bits;
+cpu->phys_bits = host_cpu_adjust_phys_bits(cpu);
 }
 return true;
 }
-- 
2.34.1

Re: [PULL 02/12] tests/qtest/migration-test: enable on s390x with TCG

2024-07-04 Thread Nicholas Piggin

On Tue Jul 2, 2024 at 8:33 PM AEST, Thomas Huth wrote:
> From: Nicholas Piggin 
>
> s390x with TCG is more stable now. Enable it.

Ah, you did a more complete version of my flic fix that migrates all the
state. I didn't see that go by but yeah I suspect that was probably the
correct thing to do. Thanks for that.

Should the s390x flic migrate fix could be got to stable, perhaps?

There's some kvm-unit-tests s390x migration tests that can be enabled
after the fix too don't forget.

Thanks,
Nick

>
> Signed-off-by: Nicholas Piggin 
> Message-Id: <20240525131241.378473-3-npig...@gmail.com>
> Reviewed-by: Prasad Pandit 
> [thuth: Added "with TCG" to the commit message]
> Signed-off-by: Thomas Huth 
> ---
>  tests/qtest/migration-test.c | 12 
>  1 file changed, 12 deletions(-)
>
> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> index 571fc1334c..70b606b888 100644
> --- a/tests/qtest/migration-test.c
> +++ b/tests/qtest/migration-test.c
> @@ -3823,16 +3823,6 @@ int main(int argc, char **argv)
> test_vmstate_checker_script);
>  #endif
>  
> -/*
> - * On s390x with TCG, migration is observed to hang due to the 'pending'
> - * state of the flic interrupt controller not being migrated or
> - * reconstructed post-migration. Disable it until the problem is 
> resolved.
> - */
> -if (g_str_equal(arch, "s390x") && !has_kvm) {
> -g_test_message("Skipping tests: s390x host with KVM is required");
> -goto test_add_done;
> -}
> -
>  if (is_x86) {
>  migration_test_add("/migration/precopy/unix/suspend/live",
> test_precopy_unix_suspend_live);
> @@ -4036,8 +4026,6 @@ int main(int argc, char **argv)
> test_vcpu_dirty_limit);
>  }
>  
> -test_add_done:
> -
>  ret = g_test_run();
>  
>  g_assert_cmpint(ret, ==, 0);

Re: [PATCH v7] virtio-net: Fix network stall at the host side waiting for kick

2024-07-04 Thread Michael S. Tsirkin

On Thu, Jul 04, 2024 at 10:20:15AM +, Yang Dongshan wrote:
> Hi, Michael
> 
> > My suggestion:
> > 
> > 
> > change virtqueue_get_avail_bytes to return the shadow
> > in an opaque unsigned value.
> > 
> > 
> > add virtqueue_poll that gets this opaque and tells us whether any new
> > buffers became available in the queue since that value> 
> > was returned.
> 
> 
> > accordingly, virtio_queue_set_notification_and_check
> > will accept this opaque value and check avail buffers
> > against it.
> 
> According to your suggestion, it's able to handle the case where the
> queue is not empty, when the queue is empty, should I add an API to
> get the shadow idx as virtio_queue_set_notification_and_check()
> needs the opaque arg.

virtqueue_get_avail_bytes would always return the opaque.

> What value should return from virtqueue_get_avail_bytes() in case of
> error branch in the function? 

One way would be to make opaque int, return a negative value on error,
positive on success.


> On 2024/7/2, 19:27, "Michael S. Tsirkin"  > wrote:
> 
> 
> On Tue, Jul 02, 2024 at 07:45:31AM +0800, Yang Dongshan wrote:
> > > what does "changed" mean here? changed compared to what?
> > For a split queue, if the shadow_avail_idx synced from avail ring idx
> > by vring_avail_idx(vq) last time doesn't equal the current value of avail 
> > ring
> > idx.
> > 
> > vq->shadow_avail_idx != vring_avail_idx(vq);
> > 
> > For packed queue, the logic is similar, if vq->shadow_avail_idx
> > 
> > becomes available, it means the guest has added buf at the slot.
> > 
> > vring_packed_desc_read(vq->vdev, &desc, &caches->desc,
> > 
> > vq->shadow_avail_idx, true);
> > 
> > if (is_desc_avail(desc.flags, vq-> shadow_avail_wrap_counter))
> > 
> > return true;
> > 
> > 
> 
> 
> This answer does not make sense from API POV.
> 
> 
> My suggestion:
> 
> 
> change virtqueue_get_avail_bytes to return the shadow
> in an opaque unsigned value.
> 
> 
> add virtqueue_poll that gets this opaque and tells us whether any new
> buffers became available in the queue since that value
> was returned.
> 
> 
> accordingly, virtio_queue_set_notification_and_check
> will accept this opaque value and check avail buffers
> against it.
> 
> 
> 
> 
> 
> 
> > On Tue, Jul 2, 2024 at 2:46 AM Michael S. Tsirkin  > > wrote:
> > 
> > On Tue, Jul 02, 2024 at 01:18:15AM +0800, Yang Dongshan wrote:
> > > > Please document what this does.
> > > okay, i will.
> > >
> > > > So this will return false if ring has any available buffers?
> > > > Equivalent to:
> > > > 
> > > > bool virtio_queue_set_notification_and_check(VirtQueue *vq, int enable)
> > > > {
> > > > virtio_queue_packed_set_notification(vq, enable);
> > > > return virtio_queue_empty(vq);
> > > > }
> > >
> > > No, only when the shadow_avail_idx is changed shall the function return
> > true,
> > 
> > 
> > what does "changed" mean here? changed compared to what?
> > 
> > > compared with the value seen by the host last time, else return false
> > even if
> > > there are some buffers available in the queue, as the total size of the
> > > available
> > > buffers in the queue can't satisfy the request.
> > >
> > > It maybe better to pass only one arg to the function like this:
> > > bool virtio_queue_set_notification_and_check(VirtQueue *vq)
> > > {
> > > virtio_queue_packed_set_notification(vq, true);
> > > 
> > > return shadow_avail_idx_changed()? true: false;
> > > }
> > >
> > > Thanks Michael a lot!
> > >
> > > On Mon, Jul 1, 2024 at 11:05 PM Michael S. Tsirkin  > > >
> > wrote:
> > >
> > > On Mon, Jul 01, 2024 at 10:00:17PM +0800, Wencheng Yang wrote:
> > > > From: thomas  > > > >
> > > >
> > > > Patch 06b12970174 ("virtio-net: fix network stall under load")
> > > > added double-check to test whether the available buffer size
> > > > can satisfy the request or not, in case the guest has added
> > > > some buffers to the avail ring simultaneously after the first
> > > > check. It will be lucky if the available buffer size becomes
> > > > okay after the double-check, then the host can send the packet
> > > > to the guest. If the buffer size still can't satisfy the request,
> > > > even if the guest has added some buffers, viritio-net would
> > > > stall at the host side forever.
> > > >
> > > > The patch checks whether the guest has added some buffers
> > > > after last check of avail idx when the available buffers are
> > > > not sufficient, if so then recheck the available buffers
> > > > in the loop.
> > > >
> > > > The patch also reverts patch "06b12970174".
> > > >
> > > > The case below can reproduce the stall.
> > > >
> > > > Guest 0
> > > > ++
> > > > | iperf |
> > > > ---> | server |
> > > > Host | ++
> > > > ++ | ...
> > > > | iperf |
> > > > | client | Guest n
> > > > ++ | ++
> > > > | | iperf |
> > > > ---> | server |
> > > > ++

RE: [PATCH RFC V3 13/29] arm/virt: Make ARM vCPU present status ACPI persistent

2024-07-04 Thread Salil Mehta via

HI Nick,

Thanks for taking time to review. Please find my replies inline.

>  From: Nicholas Piggin 
>  Sent: Thursday, July 4, 2024 3:49 AM
>  To: Salil Mehta ; qemu-devel@nongnu.org;
>  qemu-...@nongnu.org; m...@redhat.com
>  
>  On Fri Jun 14, 2024 at 9:36 AM AEST, Salil Mehta wrote:
>  > ARM arch does not allow CPUs presence to be changed [1] after kernel
>  has booted.
>  > Hence, firmware/ACPI/Qemu must ensure persistent view of the vCPUs  to
>  > the Guest kernel even when they are not present in the QoM i.e. are
>  > unplugged or are yet-to-be-plugged
>  
>  Do you need arch-independent state for this? If ARM always requires it
>  then can it be implemented between arm and acpi interface?


Yes, we do need as we cannot say if the same constraint applies to other
architectures as well. Above stated constraint affects how the architecture
common ACPI CPU code is initialized.


>  
>  If not, then perhaps could it be done in the patch that introduces all the
>  other state?
>  
>  > References:
>  > [1] Check comment 5 in the bugzilla entry
>  >Link: https://bugzilla.tianocore.org/show_bug.cgi?id=4481#c5
>  
>  If I understand correctly (and I don't know ACPI, so it's likely I don't), 
> that is
>  and update to ACPI spec to say some bit in ACPI table must remain set
>  regardless of CPU hotplug state.


ARM does not claims anything related to CPU hotplug right now. It simply
does not exists. The ACPI update is simply reinforcing the existing fact that
_STA.Present bit in the ACPI spec cannot be changed after system has booted. 

This is  because for ARM arch there are many other initializations which depend
upon the exact availability of CPU count during boot and they do not expect
that to change after boot. For example, there are so many per-CPU features
and the GIC CPU interface etc. which all expect this to be fixed at boot time.
This is immutable requirement from ARM.


>  
>  Reference links are good, I think it would be nice to add a small summary in
>  the changelog too.

sure. I will do.

Thanks
Salil.

>  
>  Thanks,
>  Nick
>  
>  >
>  > Signed-off-by: Salil Mehta 
>  > ---
>  >  cpu-common.c  |  6 ++
>  >  hw/arm/virt.c |  7 +++
>  >  include/hw/core/cpu.h | 21 +
>  >  3 files changed, 34 insertions(+)
>  >
>  > diff --git a/cpu-common.c b/cpu-common.c index
>  49d2a50835..e4b4dee99a
>  > 100644
>  > --- a/cpu-common.c
>  > +++ b/cpu-common.c
>  > @@ -128,6 +128,12 @@ bool qemu_enabled_cpu(CPUState *cpu)
>  >  return cpu && !cpu->disabled;
>  >  }
>  >
>  > +bool qemu_persistent_cpu(CPUState *cpu) {
>  > +/* cpu state can be faked to the guest via acpi */
>  > +return cpu && cpu->acpi_persistent; }
>  > +
>  >  uint64_t qemu_get_cpu_archid(int cpu_index)  {
>  >  MachineState *ms = MACHINE(qdev_get_machine()); diff --git
>  > a/hw/arm/virt.c b/hw/arm/virt.c index 5f98162587..9d33f30a6a 100644
>  > --- a/hw/arm/virt.c
>  > +++ b/hw/arm/virt.c
>  > @@ -3016,6 +3016,13 @@ static void virt_cpu_pre_plug(HotplugHandler
>  *hotplug_dev, DeviceState *dev,
>  >  return;
>  >  }
>  >  virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp);
>  > +
>  > +/*
>  > + * To give persistent presence view of vCPUs to the guest, ACPI might
>  need
>  > + * to fake the presence of the vCPUs to the guest but keep them
>  disabled.
>  > + * This shall be used during the init of ACPI Hotplug state and hot-
>  unplug
>  > + */
>  > + cs->acpi_persistent = true;
>  >  }
>  >
>  >  static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState
>  > *dev, diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index
>  > 62e68611c0..e13e542177 100644
>  > --- a/include/hw/core/cpu.h
>  > +++ b/include/hw/core/cpu.h
>  > @@ -540,6 +540,14 @@ struct CPUState {
>  >   * every CPUState is enabled across all architectures.
>  >   */
>  >  bool disabled;
>  > +/*
>  > + * On certain architectures, to provide a persistent view of the
>  'presence'
>  > + * of vCPUs to the guest, ACPI might need to fake the 'presence' of 
> the
>  > + * vCPUs but keep them ACPI-disabled for the guest. This is achieved
>  by
>  > + * returning `_STA.PRES=True` and `_STA.Ena=False` for the unplugged
>  vCPUs
>  > + * in QEMU QoM.
>  > + */
>  > +bool acpi_persistent;
>  >
>  >  /* TODO Move common fields from CPUArchState here. */
>  >  int cpu_index;
>  > @@ -959,6 +967,19 @@ bool qemu_present_cpu(CPUState *cpu);
>  >   */
>  >  bool qemu_enabled_cpu(CPUState *cpu);
>  >
>  > +/**
>  > + * qemu_persistent_cpu:
>  > + * @cpu: The vCPU to check
>  > + *
>  > + * Checks if the vCPU state should always be reflected as *present*
>  > +via ACPI
>  > + * to the Guest. By default, this is False on all architectures and
>  > +has to be
>  > + * explicity set during initialization.
>  > + *
>  > + * Returns: True if it is ACPI 'persistent' CPU
>  > + *
>  > + */
>  > +bool qemu_persistent_cpu(C

[PATCH v2 2/4] target/i386: Add CPUID leaf 0xC000_0001 EDX definitions

2024-07-04 Thread EwanHai

Add new CPUID feature flags for various Zhaoxin PadLock extensions.
These definitions will be used for Zhaoxin CPU models.

Signed-off-by: EwanHai 
---
 target/i386/cpu.h | 21 +
 1 file changed, 21 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 07e8353f36..935bf96451 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -989,6 +989,27 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 /* CPUID[0x8007].EDX flags: */
 #define CPUID_APM_INVTSC   (1U << 8)
 
+/* "rng" RNG present (xstore) */
+#define CPUID_C000_0001_EDX_XSTORE (1U << 2)
+/* "rng_en" RNG enabled */
+#define CPUID_C000_0001_EDX_XSTORE_EN  (1U << 3)
+/* "ace" on-CPU crypto (xcrypt) */
+#define CPUID_C000_0001_EDX_XCRYPT (1U << 6)
+/* "ace_en" on-CPU crypto enabled */
+#define CPUID_C000_0001_EDX_XCRYPT_EN  (1U << 7)
+/* Advanced Cryptography Engine v2 */
+#define CPUID_C000_0001_EDX_ACE2   (1U << 8)
+/* ACE v2 enabled */
+#define CPUID_C000_0001_EDX_ACE2_EN(1U << 9)
+/* PadLock Hash Engine */
+#define CPUID_C000_0001_EDX_PHE(1U << 10)
+/* PHE enabled */
+#define CPUID_C000_0001_EDX_PHE_EN (1U << 11)
+/* PadLock Montgomery Multiplier */
+#define CPUID_C000_0001_EDX_PMM(1U << 12)
+/* PMM enabled */
+#define CPUID_C000_0001_EDX_PMM_EN (1U << 13)
+
 #define CPUID_VENDOR_SZ  12
 
 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
-- 
2.34.1

[PATCH v2 4/4] target/i386: Update CMPLegacy handling for Zhaoxin CPUs

2024-07-04 Thread EwanHai

Zhaoxin CPUs handle the CMPLegacy bit in the same way
as Intel CPUs. This patch simplifies the existing logic by
using the IS_XXX_CPU macro and includes checks for Zhaoxin
vendor to align their behavior with Intel.

Signed-off-by: EwanHai 
---
 target/i386/cpu.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index a3747fc487..c52a4cf3ba 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6945,9 +6945,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  * So don't set it here for Intel to make Linux guests happy.
  */
 if (threads_per_pkg > 1) {
-if (env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1 ||
-env->cpuid_vendor2 != CPUID_VENDOR_INTEL_2 ||
-env->cpuid_vendor3 != CPUID_VENDOR_INTEL_3) {
+if (!IS_INTEL_CPU(env) && !IS_ZHAOXIN_CPU(env)) {
 *ecx |= 1 << 1;/* CmpLegacy bit */
 }
 }
-- 
2.34.1

[PATCH v2 1/4] target/i386: Add support for Zhaoxin CPU vendor identification

2024-07-04 Thread EwanHai

Zhaoxin currently uses two vendors: "Shanghai" and "Centaurhauls".
It is important to note that the latter now belongs to Zhaoxin. Therefore,
this patch replaces CPUID_VENDOR_VIA with CPUID_VENDOR_ZHAOXIN1.

The previous CPUID_VENDOR_VIA macro was only defined but never used in
QEMU, making this change straightforward.

Additionally, the IS_ZHAOXIN_CPU macro has been added to simplify the
checks for Zhaoxin CPUs.

Signed-off-by: EwanHai 
---
 target/i386/cpu.h | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c64ef0c1a2..07e8353f36 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1001,7 +1001,16 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord 
w,
 #define CPUID_VENDOR_AMD_3   0x444d4163 /* "cAMD" */
 #define CPUID_VENDOR_AMD   "AuthenticAMD"
 
-#define CPUID_VENDOR_VIA   "CentaurHauls"
+#define CPUID_VENDOR_ZHAOXIN1_1 0x746E6543 /* "Cent" */
+#define CPUID_VENDOR_ZHAOXIN1_2 0x48727561 /* "aurH" */
+#define CPUID_VENDOR_ZHAOXIN1_3 0x736C7561 /* "auls" */
+
+#define CPUID_VENDOR_ZHAOXIN2_1 0x68532020 /* "  Sh" */
+#define CPUID_VENDOR_ZHAOXIN2_2 0x68676E61 /* "angh" */
+#define CPUID_VENDOR_ZHAOXIN2_3 0x20206961 /* "ai  " */
+
+#define CPUID_VENDOR_ZHAOXIN1   "CentaurHauls"
+#define CPUID_VENDOR_ZHAOXIN2   "  Shanghai  "
 
 #define CPUID_VENDOR_HYGON"HygonGenuine"
 
@@ -1011,6 +1020,15 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord 
w,
 #define IS_AMD_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_AMD_1 && \
  (env)->cpuid_vendor2 == CPUID_VENDOR_AMD_2 && \
  (env)->cpuid_vendor3 == CPUID_VENDOR_AMD_3)
+#define IS_ZHAOXIN1_CPU(env) \
+((env)->cpuid_vendor1 == CPUID_VENDOR_ZHAOXIN1_1 && \
+ (env)->cpuid_vendor2 == CPUID_VENDOR_ZHAOXIN1_2 && \
+ (env)->cpuid_vendor3 == CPUID_VENDOR_ZHAOXIN1_3)
+#define IS_ZHAOXIN2_CPU(env) \
+((env)->cpuid_vendor1 == CPUID_VENDOR_ZHAOXIN2_1 && \
+ (env)->cpuid_vendor2 == CPUID_VENDOR_ZHAOXIN2_2 && \
+ (env)->cpuid_vendor3 == CPUID_VENDOR_ZHAOXIN2_3)
+#define IS_ZHAOXIN_CPU(env) (IS_ZHAOXIN1_CPU(env) || IS_ZHAOXIN2_CPU(env))
 
 #define CPUID_MWAIT_IBE (1U << 1) /* Interrupts can exit capability */
 #define CPUID_MWAIT_EMX (1U << 0) /* enumeration supported */
-- 
2.34.1

[PATCH v2 0/4] Add support for Zhaoxin Yongfeng CPU model and other improvements

2024-07-04 Thread EwanHai

### Summary of changes

EwanHai (4):
  target/i386: Add support for Zhaoxin CPU vendor identification
  target/i386: Add CPUID leaf 0xC000_0001 EDX definitions
  target/i386: Introduce Zhaoxin Yongfeng CPU model
  target/i386: Update CMPLegacy handling for Zhaoxin CPUs

 target/i386/cpu.c | 128 --
 target/i386/cpu.h |  41 ++-
 2 files changed, 165 insertions(+), 4 deletions(-)

### Changes since v1
1. Removed VIA-related information from the patch description to avoid
misunderstanding.
2. Replaced CPUID_VENDOR_VIA with CPUID_VENDOR_ZHAOXIN1 because the
"Centaurhauls" vendor ID now belongs to Zhaoxin.The previous CPUID_VENDOR_VIA
macro was only defined but never used in QEMU, making this change
straightforward.

v1 link: https://lore.kernel.org/qemu-devel/20240625091905.1325205-1-ewanhai-
o...@zhaoxin.com/

### Known Issues
1. Issue with VMX Preemption Timer Rate on Yongfeng CPU:
   - Description: On Yongfeng CPUs, the VMX preemption timer rate is 128,
 meaning that bits 4:0 of MSR_IA32_VMX_MISC_CTLS should be set to 7.
 However, due to Intel's rate being 5, the Linux kernel has hardcoded
 this value as 5: `#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5`.
   - Impact: This discrepancy can cause incorrect behavior in the VMX
 preemption timer on Yongfeng CPUs.
   - Workaround: A patch to correct this issue in the Linux kernel is
 currently being prepared and will be submitted soon.
-- 
2.34.1

[PATCH v2 3/4] target/i386: Introduce Zhaoxin Yongfeng CPU model

2024-07-04 Thread EwanHai

Introduce support for the Zhaoxin Yongfeng CPU model.
The Zhaoxin Yongfeng CPU is Zhaoxin's latest server CPU.

This new cpu model ensure that QEMU can correctly emulate the Zhaoxin
Yongfeng CPU, providing accurate functionality and performance characteristics.

Signed-off-by: EwanHai 
---
 target/i386/cpu.c | 124 ++
 1 file changed, 124 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 914bef442c..a3747fc487 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5211,6 +5211,130 @@ static const X86CPUDefinition builtin_x86_defs[] = {
 .model_id = "AMD EPYC-Genoa Processor",
 .cache_info = &epyc_genoa_cache_info,
 },
+{
+.name = "YongFeng",
+.level = 0x1F,
+.vendor = CPUID_VENDOR_ZHAOXIN1,
+.family = 7,
+.model = 11,
+.stepping = 3,
+/* missing: CPUID_HT, CPUID_TM, CPUID_PBE */
+.features[FEAT_1_EDX] =
+CPUID_SS | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+CPUID_ACPI | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV |
+CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC |
+CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC |
+CPUID_PSE | CPUID_DE | CPUID_VME | CPUID_FP87,
+/*
+ * missing: CPUID_EXT_OSXSAVE, CPUID_EXT_XTPR, CPUID_EXT_TM2,
+ * CPUID_EXT_EST, CPUID_EXT_SMX, CPUID_EXT_VMX
+ */
+.features[FEAT_1_ECX] =
+CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX |
+CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_TSC_DEADLINE_TIMER |
+CPUID_EXT_POPCNT | CPUID_EXT_MOVBE | CPUID_EXT_X2APIC |
+CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_PCID |
+CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 |
+CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
+.features[FEAT_7_0_EBX] =
+CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_ADX |
+CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_BMI2 |
+CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_BMI1 |
+CPUID_7_0_EBX_FSGSBASE,
+/* missing: CPUID_7_0_ECX_OSPKE */
+.features[FEAT_7_0_ECX] =
+CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_UMIP,
+.features[FEAT_7_0_EDX] =
+CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL,
+.features[FEAT_8000_0001_EDX] =
+CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB |
+CPUID_EXT2_NX | CPUID_EXT2_SYSCALL,
+.features[FEAT_8000_0001_ECX] =
+CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM,
+.features[FEAT_8000_0007_EDX] = CPUID_APM_INVTSC,
+/*
+ * TODO: When the Linux kernel introduces other existing definitions
+ * for this leaf, remember to update the definitions here.
+ */
+.features[FEAT_C000_0001_EDX] =
+CPUID_C000_0001_EDX_PMM_EN | CPUID_C000_0001_EDX_PMM |
+CPUID_C000_0001_EDX_PHE_EN | CPUID_C000_0001_EDX_PHE |
+CPUID_C000_0001_EDX_ACE2 |
+CPUID_C000_0001_EDX_XCRYPT_EN | CPUID_C000_0001_EDX_XCRYPT |
+CPUID_C000_0001_EDX_XSTORE_EN | CPUID_C000_0001_EDX_XSTORE,
+.features[FEAT_XSAVE] =
+CPUID_XSAVE_XSAVEOPT,
+.features[FEAT_ARCH_CAPABILITIES] =
+MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY |
+MSR_ARCH_CAP_MDS_NO | MSR_ARCH_CAP_PSCHANGE_MC_NO |
+MSR_ARCH_CAP_SSB_NO,
+.features[FEAT_VMX_PROCBASED_CTLS] =
+VMX_CPU_BASED_VIRTUAL_INTR_PENDING | VMX_CPU_BASED_HLT_EXITING |
+VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_INVLPG_EXITING |
+VMX_CPU_BASED_MWAIT_EXITING | VMX_CPU_BASED_RDPMC_EXITING |
+VMX_CPU_BASED_RDTSC_EXITING | VMX_CPU_BASED_CR3_LOAD_EXITING |
+VMX_CPU_BASED_CR3_STORE_EXITING | VMX_CPU_BASED_CR8_LOAD_EXITING |
+VMX_CPU_BASED_CR8_STORE_EXITING | VMX_CPU_BASED_TPR_SHADOW |
+VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_MOV_DR_EXITING |
+VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS |
+VMX_CPU_BASED_MONITOR_TRAP_FLAG | VMX_CPU_BASED_USE_MSR_BITMAPS |
+VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING |
+VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
+/*
+ * missing: VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING,
+ * VMX_SECONDARY_EXEC_TSC_SCALING
+ */
+.features[FEAT_VMX_SECONDARY_CTLS] =
+VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC |
+VMX_SECONDARY_EXEC_RDTSCP | VMX_SECONDARY_EXEC_ENABLE_VPID |
+VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+VMX_SECONDARY_EXEC_WBINV

1 2 3 4 >

1 - 100 of 319 matches

Mail list logo