date:20211125

[PATCH v5 04/22] target/riscv: Create xl field in env

2021-11-25 Thread LIU Zhiwei

Signed-off-by: LIU Zhiwei 
---
 target/riscv/cpu.c| 1 +
 target/riscv/cpu.h| 3 +++
 target/riscv/cpu_helper.c | 3 ++-
 target/riscv/csr.c| 2 ++
 target/riscv/machine.c| 5 +++--
 5 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index f812998123..5c757ce33a 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -377,6 +377,7 @@ static void riscv_cpu_reset(DeviceState *dev)
 /* mmte is supposed to have pm.current hardwired to 1 */
 env->mmte |= (PM_EXT_INITIAL | MMTE_M_PM_CURRENT);
 #endif
+env->xl = riscv_cpu_mxl(env);
 cs->exception_index = RISCV_EXCP_NONE;
 env->load_res = -1;
 set_default_nan_mode(1, &env->fp_status);
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 0760c0af93..412339dbad 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -138,6 +138,7 @@ struct CPURISCVState {
 uint32_t misa_mxl_max;  /* max mxl for this cpu */
 uint32_t misa_ext;  /* current extensions */
 uint32_t misa_ext_mask; /* max ext for this cpu */
+uint32_t xl;/* current xlen */
 
 uint32_t features;
 
@@ -420,6 +421,8 @@ static inline RISCVMXL riscv_cpu_mxl(CPURISCVState *env)
 }
 #endif
 
+RISCVMXL cpu_get_xl(CPURISCVState *env);
+
 /*
  * A simplification for VLMAX
  * = (1 << LMUL) * VLEN / (8 * (1 << SEW))
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 9eeed38c7e..b6cddf8648 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -35,7 +35,7 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
 #endif
 }
 
-static RISCVMXL cpu_get_xl(CPURISCVState *env)
+RISCVMXL cpu_get_xl(CPURISCVState *env)
 {
 #if defined(TARGET_RISCV32)
 return MXL_RV32;
@@ -330,6 +330,7 @@ void riscv_cpu_set_mode(CPURISCVState *env, target_ulong 
newpriv)
 }
 /* tlb_flush is unnecessary as mode is contained in mmu_idx */
 env->priv = newpriv;
+env->xl = cpu_get_xl(env);
 
 /*
  * Clear the load reservation - otherwise a reservation placed in one
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index ce20c3a970..d4ee897be2 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -553,6 +553,7 @@ static RISCVException write_mstatus(CPURISCVState *env, int 
csrno,
 mstatus = set_field(mstatus, MSTATUS64_UXL, MXL_RV64);
 }
 env->mstatus = mstatus;
+env->xl = cpu_get_xl(env);
 
 return RISCV_EXCP_NONE;
 }
@@ -654,6 +655,7 @@ static RISCVException write_misa(CPURISCVState *env, int 
csrno,
 /* flush translation cache */
 tb_flush(env_cpu(env));
 env->misa_ext = val;
+env->xl = riscv_cpu_mxl(env);
 return RISCV_EXCP_NONE;
 }
 
diff --git a/target/riscv/machine.c b/target/riscv/machine.c
index ad8248ebfd..08ed603626 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -166,8 +166,8 @@ static const VMStateDescription vmstate_pointermasking = {
 
 const VMStateDescription vmstate_riscv_cpu = {
 .name = "cpu",
-.version_id = 3,
-.minimum_version_id = 3,
+.version_id = 4,
+.minimum_version_id = 4,
 .fields = (VMStateField[]) {
 VMSTATE_UINTTL_ARRAY(env.gpr, RISCVCPU, 32),
 VMSTATE_UINT64_ARRAY(env.fpr, RISCVCPU, 32),
@@ -183,6 +183,7 @@ const VMStateDescription vmstate_riscv_cpu = {
 VMSTATE_UINT32(env.misa_ext, RISCVCPU),
 VMSTATE_UINT32(env.misa_mxl_max, RISCVCPU),
 VMSTATE_UINT32(env.misa_ext_mask, RISCVCPU),
+VMSTATE_UINT32(env.xl, RISCVCPU),
 VMSTATE_UINT32(env.features, RISCVCPU),
 VMSTATE_UINTTL(env.priv, RISCVCPU),
 VMSTATE_UINTTL(env.virt, RISCVCPU),
-- 
2.25.1

[PATCH v5 08/22] target/riscv: Relax debug check for pm write

2021-11-25 Thread LIU Zhiwei

Signed-off-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/csr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index d4ee897be2..bfafd3bd96 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1465,6 +1465,9 @@ static bool check_pm_current_disabled(CPURISCVState *env, 
int csrno)
 int csr_priv = get_field(csrno, 0x300);
 int pm_current;
 
+if (env->debugger) {
+return false;
+}
 /*
  * If priv lvls differ that means we're accessing csr from higher priv lvl,
  * so allow the access
-- 
2.25.1

[PATCH v5 18/22] target/riscv: Ajdust vector atomic check with XLEN

2021-11-25 Thread LIU Zhiwei

Signed-off-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 17ee3babef..aacb97d280 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -739,7 +739,8 @@ static bool amo_check(DisasContext *s, arg_rwdvm* a)
 (!a->wd || vext_check_overlap_mask(s, a->rd, a->vm, false)) &&
 vext_check_reg(s, a->rd, false) &&
 vext_check_reg(s, a->rs2, false) &&
-((1 << s->sew) <= sizeof(target_ulong)) &&
+/* TODO: RV128 could allow 128-bit atomics */
+((1 << s->sew) <=  (get_xl(s) == MXL_RV32 ? 4 : 8)) &&
 ((1 << s->sew) >= 4));
 }
 
-- 
2.25.1

[PATCH v5 17/22] target/riscv: Remove VILL field in VTYPE

2021-11-25 Thread LIU Zhiwei

Signed-off-by: LIU Zhiwei 
Acked-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index ef1d87af38..347428ffdc 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -106,7 +106,6 @@ FIELD(VTYPE, VLMUL, 0, 2)
 FIELD(VTYPE, VSEW, 2, 3)
 FIELD(VTYPE, VEDIV, 5, 2)
 FIELD(VTYPE, RESERVED, 7, sizeof(target_ulong) * 8 - 8)
-FIELD(VTYPE, VILL, sizeof(target_ulong) * 8 - 1, 1)
 
 struct CPURISCVState {
 target_ulong gpr[32];
-- 
2.25.1

[PATCH v5 11/22] target/riscv: Alloc tcg global for cur_pm[mask|base]

2021-11-25 Thread LIU Zhiwei

Replace the array of pm_mask/pm_base with scalar variables.
Remove the cached array value in DisasContext.

Signed-off-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/translate.c | 32 
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index a6a73ced9e..6cb74c6355 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -37,8 +37,8 @@ static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
 static TCGv load_res;
 static TCGv load_val;
 /* globals for PM CSRs */
-static TCGv pm_mask[4];
-static TCGv pm_base[4];
+static TCGv pm_mask;
+static TCGv pm_base;
 
 #include "exec/gen-icount.h"
 
@@ -88,8 +88,6 @@ typedef struct DisasContext {
 TCGv temp[4];
 /* PointerMasking extension */
 bool pm_enabled;
-TCGv pm_mask;
-TCGv pm_base;
 } DisasContext;
 
 static inline bool has_ext(DisasContext *ctx, uint32_t ext)
@@ -297,8 +295,8 @@ static TCGv gen_pm_adjust_address(DisasContext *s, TCGv src)
 return src;
 } else {
 temp = temp_new(s);
-tcg_gen_andc_tl(temp, src, s->pm_mask);
-tcg_gen_or_tl(temp, temp, s->pm_base);
+tcg_gen_andc_tl(temp, src, pm_mask);
+tcg_gen_or_tl(temp, temp, pm_base);
 return temp;
 }
 }
@@ -647,10 +645,6 @@ static void riscv_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 ctx->ntemp = 0;
 memset(ctx->temp, 0, sizeof(ctx->temp));
 ctx->pm_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_ENABLED);
-int priv = tb_flags & TB_FLAGS_PRIV_MMU_MASK;
-ctx->pm_mask = pm_mask[priv];
-ctx->pm_base = pm_base[priv];
-
 ctx->zero = tcg_constant_tl(0);
 }
 
@@ -763,19 +757,9 @@ void riscv_translate_init(void)
  "load_res");
 load_val = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_val),
  "load_val");
-#ifndef CONFIG_USER_ONLY
 /* Assign PM CSRs to tcg globals */
-pm_mask[PRV_U] =
-  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, upmmask), "upmmask");
-pm_base[PRV_U] =
-  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, upmbase), "upmbase");
-pm_mask[PRV_S] =
-  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, spmmask), "spmmask");
-pm_base[PRV_S] =
-  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, spmbase), "spmbase");
-pm_mask[PRV_M] =
-  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, mpmmask), "mpmmask");
-pm_base[PRV_M] =
-  tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, mpmbase), "mpmbase");
-#endif
+pm_mask = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, cur_pmmask),
+ "pmmask");
+pm_base = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, cur_pmbase),
+ "pmbase");
 }
-- 
2.25.1

[PATCH v5 20/22] target/riscv: Adjust vector address with mask

2021-11-25 Thread LIU Zhiwei

The mask comes from the pointer masking extension, or the max value
corresponding to XLEN bits.

Signed-off-by: LIU Zhiwei 
Acked-by: Alistair Francis 
---
 target/riscv/vector_helper.c | 23 ++-
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index e3ac70da01..c3976cc3d4 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -116,6 +116,11 @@ static inline uint32_t vext_maxsz(uint32_t desc)
 return simd_maxsz(desc) << vext_lmul(desc);
 }
 
+static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
+{
+return (addr & env->cur_pmmask) | env->cur_pmbase;
+}
+
 /*
  * This function checks watchpoint before real load operation.
  *
@@ -133,12 +138,12 @@ static void probe_pages(CPURISCVState *env, target_ulong 
addr,
 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
 target_ulong curlen = MIN(pagelen, len);
 
-probe_access(env, addr, curlen, access_type,
+probe_access(env, adjust_addr(env, addr), curlen, access_type,
  cpu_mmu_index(env, false), ra);
 if (len > curlen) {
 addr += curlen;
 curlen = len - curlen;
-probe_access(env, addr, curlen, access_type,
+probe_access(env, adjust_addr(env, addr), curlen, access_type,
  cpu_mmu_index(env, false), ra);
 }
 }
@@ -299,7 +304,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
 }
 while (k < nf) {
 target_ulong addr = base + stride * i + k * msz;
-ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ldst_elem(env, adjust_addr(env, addr), i + k * vlmax, vd, ra);
 k++;
 }
 }
@@ -392,7 +397,7 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState 
*env, uint32_t desc,
 k = 0;
 while (k < nf) {
 target_ulong addr = base + (i * nf + k) * msz;
-ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ldst_elem(env, adjust_addr(env, addr), i + k * vlmax, vd, ra);
 k++;
 }
 }
@@ -529,7 +534,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
 }
 while (k < nf) {
 abi_ptr addr = get_index_addr(base, i, vs2) + k * msz;
-ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ldst_elem(env, adjust_addr(env, addr), i + k * vlmax, vd, ra);
 k++;
 }
 }
@@ -619,7 +624,7 @@ vext_ldff(void *vd, void *v0, target_ulong base,
 if (!vm && !vext_elem_mask(v0, mlen, i)) {
 continue;
 }
-addr = base + nf * i * msz;
+addr = adjust_addr(env, base + nf * i * msz);
 if (i == 0) {
 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
 } else {
@@ -646,7 +651,7 @@ vext_ldff(void *vd, void *v0, target_ulong base,
 break;
 }
 remain -= offset;
-addr += offset;
+addr = adjust_addr(env, addr + offset);
 }
 }
 }
@@ -662,7 +667,7 @@ ProbeSuccess:
 }
 while (k < nf) {
 target_ulong addr = base + (i * nf + k) * msz;
-ldst_elem(env, addr, i + k * vlmax, vd, ra);
+ldst_elem(env, adjust_addr(env, addr), i + k * vlmax, vd, ra);
 k++;
 }
 }
@@ -801,7 +806,7 @@ vext_amo_noatomic(void *vs3, void *v0, target_ulong base,
 continue;
 }
 addr = get_index_addr(base, i, vs2);
-noatomic_op(vs3, addr, wd, i, env, ra);
+noatomic_op(vs3, adjust_addr(env, addr), wd, i, env, ra);
 }
 clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz);
 }
-- 
2.25.1

[PATCH v5 13/22] target/riscv: Split pm_enabled into mask and base

2021-11-25 Thread LIU Zhiwei

Use cached cur_pmmask and cur_pmbase to infer the
current PM mode.

This may decrease the TCG IR by one when pm_enabled
is true and pm_base_enabled is false.

Signed-off-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.h|  3 ++-
 target/riscv/cpu_helper.c | 24 ++--
 target/riscv/translate.c  | 12 
 3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 28006b782f..3986a2164d 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -412,7 +412,8 @@ FIELD(TB_FLAGS, MSTATUS_HS_FS, 11, 2)
 /* The combination of MXL/SXL/UXL that applies to the current cpu mode. */
 FIELD(TB_FLAGS, XL, 13, 2)
 /* If PointerMasking should be applied */
-FIELD(TB_FLAGS, PM_ENABLED, 15, 1)
+FIELD(TB_FLAGS, PM_MASK_ENABLED, 15, 1)
+FIELD(TB_FLAGS, PM_BASE_ENABLED, 16, 1)
 
 #ifdef TARGET_RISCV32
 #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index d74199b49b..91b84be48f 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -107,27 +107,15 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, 
target_ulong *pc,
 flags = FIELD_DP32(flags, TB_FLAGS, MSTATUS_HS_FS,
get_field(env->mstatus_hs, MSTATUS_FS));
 }
-if (riscv_has_ext(env, RVJ)) {
-int priv = flags & TB_FLAGS_PRIV_MMU_MASK;
-bool pm_enabled = false;
-switch (priv) {
-case PRV_U:
-pm_enabled = env->mmte & U_PM_ENABLE;
-break;
-case PRV_S:
-pm_enabled = env->mmte & S_PM_ENABLE;
-break;
-case PRV_M:
-pm_enabled = env->mmte & M_PM_ENABLE;
-break;
-default:
-g_assert_not_reached();
-}
-flags = FIELD_DP32(flags, TB_FLAGS, PM_ENABLED, pm_enabled);
-}
 #endif
 
 flags = FIELD_DP32(flags, TB_FLAGS, XL, env->xl);
+if (env->cur_pmmask < (env->xl == MXL_RV32 ? UINT32_MAX : UINT64_MAX)) {
+flags = FIELD_DP32(flags, TB_FLAGS, PM_MASK_ENABLED, 1);
+}
+if (env->cur_pmbase != 0) {
+flags = FIELD_DP32(flags, TB_FLAGS, PM_BASE_ENABLED, 1);
+}
 
 *pflags = flags;
 }
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index fd75f7c4bc..10c16e759d 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -87,7 +87,8 @@ typedef struct DisasContext {
 /* Space for 3 operands plus 1 extra for address computation. */
 TCGv temp[4];
 /* PointerMasking extension */
-bool pm_enabled;
+bool pm_mask_enabled;
+bool pm_base_enabled;
 } DisasContext;
 
 static inline bool has_ext(DisasContext *ctx, uint32_t ext)
@@ -291,12 +292,14 @@ static TCGv get_address(DisasContext *ctx, int rs1, int 
imm)
 TCGv src1 = get_gpr(ctx, rs1, EXT_NONE);
 
 tcg_gen_addi_tl(addr, src1, imm);
-if (ctx->pm_enabled) {
+if (ctx->pm_mask_enabled) {
 tcg_gen_and_tl(addr, addr, pm_mask);
-tcg_gen_or_tl(addr, addr, pm_base);
 } else if (get_xl(ctx) == MXL_RV32) {
 tcg_gen_ext32u_tl(addr, addr);
 }
+if (ctx->pm_base_enabled) {
+tcg_gen_or_tl(addr, addr, pm_base);
+}
 return addr;
 }
 
@@ -643,7 +646,8 @@ static void riscv_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 ctx->cs = cs;
 ctx->ntemp = 0;
 memset(ctx->temp, 0, sizeof(ctx->temp));
-ctx->pm_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_ENABLED);
+ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
+ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
 ctx->zero = tcg_constant_tl(0);
 }
 
-- 
2.25.1

[PATCH v5 14/22] target/riscv: Split out the vill from vtype

2021-11-25 Thread LIU Zhiwei

We need not specially process vtype when XLEN changes.

Signed-off-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.h   |  1 +
 target/riscv/cpu_helper.c|  3 +--
 target/riscv/csr.c   | 13 -
 target/riscv/machine.c   |  5 +++--
 target/riscv/vector_helper.c |  3 ++-
 5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 3986a2164d..1e56405243 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -119,6 +119,7 @@ struct CPURISCVState {
 target_ulong vl;
 target_ulong vstart;
 target_ulong vtype;
+bool vill;
 
 target_ulong pc;
 target_ulong load_res;
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 91b84be48f..7945f687b4 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -77,8 +77,7 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong 
*pc,
 if (riscv_has_ext(env, RVV)) {
 uint32_t vlmax = vext_get_vlmax(env_archcpu(env), env->vtype);
 bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl);
-flags = FIELD_DP32(flags, TB_FLAGS, VILL,
-FIELD_EX64(env->vtype, VTYPE, VILL));
+flags = FIELD_DP32(flags, TB_FLAGS, VILL, env->vill);
 flags = FIELD_DP32(flags, TB_FLAGS, SEW,
 FIELD_EX64(env->vtype, VTYPE, VSEW));
 flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 68da5d2173..3f284090fc 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -286,7 +286,18 @@ static RISCVException write_fcsr(CPURISCVState *env, int 
csrno,
 static RISCVException read_vtype(CPURISCVState *env, int csrno,
  target_ulong *val)
 {
-*val = env->vtype;
+uint64_t vill;
+switch (env->xl) {
+case MXL_RV32:
+vill = (uint32_t)env->vill << 31;
+break;
+case MXL_RV64:
+vill = (uint64_t)env->vill << 63;
+break;
+default:
+g_assert_not_reached();
+}
+*val = (target_ulong)vill | env->vtype;
 return RISCV_EXCP_NONE;
 }
 
diff --git a/target/riscv/machine.c b/target/riscv/machine.c
index 10a0f34f37..cf71a5ad33 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -124,8 +124,8 @@ static bool vector_needed(void *opaque)
 
 static const VMStateDescription vmstate_vector = {
 .name = "cpu/vector",
-.version_id = 1,
-.minimum_version_id = 1,
+.version_id = 2,
+.minimum_version_id = 2,
 .needed = vector_needed,
 .fields = (VMStateField[]) {
 VMSTATE_UINT64_ARRAY(env.vreg, RISCVCPU, 32 * RV_VLEN_MAX / 64),
@@ -134,6 +134,7 @@ static const VMStateDescription vmstate_vector = {
 VMSTATE_UINTTL(env.vl, RISCVCPU),
 VMSTATE_UINTTL(env.vstart, RISCVCPU),
 VMSTATE_UINTTL(env.vtype, RISCVCPU),
+VMSTATE_BOOL(env.vill, RISCVCPU),
 VMSTATE_END_OF_LIST()
 }
 };
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 12c31aa4b4..b02ccefa4d 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -38,7 +38,8 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong 
s1,
 
 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
 /* only set vill bit. */
-env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
+env->vill = 1;
+env->vtype = 0;
 env->vl = 0;
 env->vstart = 0;
 return 0;
-- 
2.25.1

[PATCH v5 22/22] target/riscv: Enable uxl field write

2021-11-25 Thread LIU Zhiwei

Signed-off-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu_bits.h | 2 ++
 target/riscv/csr.c  | 8 +---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 9913fa9f77..5106f0e769 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -413,6 +413,8 @@ typedef enum {
 #define SSTATUS_SUM 0x0004 /* since: priv-1.10 */
 #define SSTATUS_MXR 0x0008
 
+#define SSTATUS64_UXL   0x0003ULL
+
 #define SSTATUS32_SD0x8000
 #define SSTATUS64_SD0x8000ULL
 
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 3f284090fc..735d9a7825 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -553,15 +553,14 @@ static RISCVException write_mstatus(CPURISCVState *env, 
int csrno,
  * RV32: MPV and GVA are not in mstatus. The current plan is to
  * add them to mstatush. For now, we just don't support it.
  */
-mask |= MSTATUS_MPV | MSTATUS_GVA;
+mask |= MSTATUS_MPV | MSTATUS_GVA | MSTATUS64_UXL;
 }
 
 mstatus = (mstatus & ~mask) | (val & mask);
 
 if (riscv_cpu_mxl(env) == MXL_RV64) {
-/* SXL and UXL fields are for now read only */
+/* SXL fields are for now read only */
 mstatus = set_field(mstatus, MSTATUS64_SXL, MXL_RV64);
-mstatus = set_field(mstatus, MSTATUS64_UXL, MXL_RV64);
 }
 env->mstatus = mstatus;
 env->xl = cpu_get_xl(env);
@@ -842,6 +841,9 @@ static RISCVException write_sstatus(CPURISCVState *env, int 
csrno,
 target_ulong val)
 {
 target_ulong mask = (sstatus_v1_10_mask);
+if (env->xl != MXL_RV32) {
+mask |= SSTATUS64_UXL;
+}
 target_ulong newval = (env->mstatus & ~mask) | (val & mask);
 return write_mstatus(env, CSR_MSTATUS, newval);
 }
-- 
2.25.1

[PATCH v5 16/22] target/riscv: Adjust vsetvl according to XLEN

2021-11-25 Thread LIU Zhiwei

Signed-off-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.h   | 5 +
 target/riscv/vector_helper.c | 7 +--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 2cee98f152..ef1d87af38 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -427,6 +427,11 @@ static inline RISCVMXL riscv_cpu_mxl(CPURISCVState *env)
 
 RISCVMXL cpu_get_xl(CPURISCVState *env);
 
+static inline int riscv_cpu_xlen(CPURISCVState *env)
+{
+return 16 << env->xl;
+}
+
 /*
  * A simplification for VLMAX
  * = (1 << LMUL) * VLEN / (8 * (1 << SEW))
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index b02ccefa4d..e9d49cf105 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -33,8 +33,11 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong 
s1,
 RISCVCPU *cpu = env_archcpu(env);
 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
-bool vill = FIELD_EX64(s2, VTYPE, VILL);
-target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
+int xlen = riscv_cpu_xlen(env);
+bool vill = (s2 >> (xlen - 1)) & 0x1;
+target_ulong reserved = s2 &
+MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
+xlen - 1 - R_VTYPE_RESERVED_SHIFT);
 
 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
 /* only set vill bit. */
-- 
2.25.1

[PATCH v5 15/22] target/riscv: Fix RESERVED field length in VTYPE

2021-11-25 Thread LIU Zhiwei

Signed-off-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 1e56405243..2cee98f152 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -105,7 +105,7 @@ typedef struct CPURISCVState CPURISCVState;
 FIELD(VTYPE, VLMUL, 0, 2)
 FIELD(VTYPE, VSEW, 2, 3)
 FIELD(VTYPE, VEDIV, 5, 2)
-FIELD(VTYPE, RESERVED, 7, sizeof(target_ulong) * 8 - 9)
+FIELD(VTYPE, RESERVED, 7, sizeof(target_ulong) * 8 - 8)
 FIELD(VTYPE, VILL, sizeof(target_ulong) * 8 - 1, 1)
 
 struct CPURISCVState {
-- 
2.25.1

[PATCH v5 19/22] target/riscv: Fix check range for first fault only

2021-11-25 Thread LIU Zhiwei

Only check the range that has passed the address translation.

Signed-off-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/vector_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index e9d49cf105..e3ac70da01 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -631,12 +631,12 @@ vext_ldff(void *vd, void *v0, target_ulong base,
  cpu_mmu_index(env, false));
 if (host) {
 #ifdef CONFIG_USER_ONLY
-if (page_check_range(addr, nf * msz, PAGE_READ) < 0) {
+if (page_check_range(addr, offset, PAGE_READ) < 0) {
 vl = i;
 goto ProbeSuccess;
 }
 #else
-probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
+probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
 #endif
 } else {
 vl = i;
-- 
2.25.1

[PATCH v5 21/22] target/riscv: Adjust scalar reg in vector with XLEN

2021-11-25 Thread LIU Zhiwei

When sew <= 32bits, not need to extend scalar reg.
When sew > 32bits, if xlen is less that sew, we should sign extend
the scalar register, except explicitly specified by the spec.

Signed-off-by: LIU Zhiwei 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 44 ++---
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index aacb97d280..411b5414b2 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -833,7 +833,7 @@ typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, 
TCGv_ptr,
   TCGv_env, TCGv_i32);
 
 static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
-gen_helper_opivx *fn, DisasContext *s)
+gen_helper_opivx *fn, DisasContext *s, DisasExtend ext)
 {
 TCGv_ptr dest, src2, mask;
 TCGv src1;
@@ -846,7 +846,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t 
vs2, uint32_t vm,
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
 src2 = tcg_temp_new_ptr();
-src1 = get_gpr(s, rs1, EXT_NONE);
+src1 = get_gpr(s, rs1, ext);
 
 data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
 data = FIELD_DP32(data, VDATA, VM, vm);
@@ -895,7 +895,7 @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn 
*gvec_fn,
 tcg_temp_free_i64(src1);
 return true;
 }
-return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
+return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, EXT_SIGN);
 }
 
 /* OPIVX with GVEC IR */
@@ -1128,7 +1128,7 @@ static bool do_opivx_widen(DisasContext *s, arg_rmrr *a,
gen_helper_opivx *fn)
 {
 if (opivx_widen_check(s, a)) {
-return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
+return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, EXT_SIGN);
 }
 return false;
 }
@@ -1213,7 +1213,7 @@ static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a,
gen_helper_opivx *fn)
 {
 if (opiwx_widen_check(s, a)) {
-return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
+return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, EXT_SIGN);
 }
 return false;
 }
@@ -1312,7 +1312,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
   \
 gen_helper_##NAME##_w, gen_helper_##NAME##_d,\
 };   \
  \
-return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
+return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, \
+   fns[s->sew], s, EXT_SIGN);\
 }\
 return false;\
 }
@@ -1386,7 +1387,7 @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, 
GVecGen2sFn32 *gvec_fn,
 tcg_temp_free_i32(src1);
 return true;
 }
-return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
+return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, EXT_SIGN);
 }
 
 #define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \
@@ -1472,7 +1473,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
   \
 gen_helper_##NAME##_h,   \
 gen_helper_##NAME##_w,   \
 };   \
-return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
+return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, \
+   fns[s->sew], s, EXT_SIGN);\
 }\
 return false;\
 }
@@ -2670,6 +2672,7 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
 /* This instruction ignores LMUL and vector register groups */
 int maxsz = s->vlen >> 3;
 TCGv_i64 t1;
+TCGv src1 = get_gpr(s, a->rs1, EXT_ZERO);
 TCGLabel *over = gen_new_label();
 
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
@@ -2679,7 +2682,7 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
 }
 
 t1 = tcg_temp_new_i64();
-tcg_gen_extu_tl_i64(t1, cpu_gpr[a->rs1]);
+tcg_gen_extu_tl_i64(t1, src1);
 vec_element_storei(s, a->rd, 0, t1);
 tcg_temp_free_i64(t1);
 done:
@@ -2748,12 +2751,28 @@ static bool slideup_check(DisasContext *s, arg_rmrr *a)
 (a->rd != a->rs2));
 }
 
+/* OPIVXU without GVEC IR */
+#define GEN_OPIVXU_TRANS(NAME, CHECK)\

Re: [RFC PATCH 0/2] QEMU/openbios: PPC Software TLB support in the G4 family

2021-11-25 Thread Cédric Le Goater

On 11/25/21 01:45, BALATON Zoltan wrote:

On Wed, 24 Nov 2021, Fabiano Rosas wrote:

Fabiano Rosas  writes:

Hi all,

We have this bug in QEMU which indicates that we haven't been able to
run openbios on a 7450 cpu for quite a long time:

https://gitlab.com/qemu-project/qemu/-/issues/86

OK:
  $ ./qemu-system-ppc -serial mon:stdio -nographic -cpu 7410

 >> =
 >> OpenBIOS 1.1 [Nov 1 2021 20:36]
  ...

NOK:
  $ ./qemu-system-ppc -serial mon:stdio -nographic -cpu 7450 -d int

This CPU appears in PowerMac G4 so maybe better use -machine mac99,via=pmu with 
it as it's strange to put it in a g3beige but that may not matter for 
reproducing the problem.

As for guests, those running on the said PowerMac G4 should have support for these CPUs so 
maybe you can try some Mac OS X versions (or maybe MorphOS but that is not the best for 
debugging as there's no source available nor any help from its owners but just to see if it 
boots it may be sufficient, it should work on real PowerMac G4). According to 
 this CPU was used in 
 and it runs up 
to Mac OS 10.4.11. (Although OpenBIOS sets the device tree according to a PowerMac3,1 so 
not sure it's entirely correct for the PowerMac3,5 that has a 7450 CPU and if it matters 
for Mac OS X.)

I asked about this before but got no reply back then:
https://lists.nongnu.org/archive/html/qemu-ppc/2020-03/msg00292.html

This was because pegasos2 should have 7447 but it did not work so currently 
I've set it to 7400 which also works. The original board firmware had some 
problem detecting it but I think that only results in wrong CPU speed shown 
which is only a cosmetic problem, otherwise it seems to work. Since pegasos2 
does not use OpenBIOS but either VOF or the board's original firmware it may be 
an alternative way to test at least 7447 which the firmware and guests running 
on that board should work with. At least Debian 8.11 powerpc version had 
support for pegasos2 and should boot, I'm not sure newer versions still work. 
More info on pegasos2 can be found at:
http://zero.eik.bme.hu/~balaton/qemu/amiga/#morphos and
https://osdn.net/projects/qmiga/wiki/SubprojectPegasos2

I would be nice to add some documentation for these machines under :

  https://qemu.readthedocs.io/en/latest/system/target-ppc.html

Thanks

C.

I don't remember what problem I had with 7447 but if it does not work with 
pegasos2 then maybe there's some other problem with it too. I think it was 
maybe related to TLBs but I don't know and had no time to try again so I could 
be entirely wrong about this.

Regards,
BALATON Zoltan

  Raise exception at fff08cc4 => 004e (00)
  QEMU: Terminated

The actual issue is straightforward. There is a non-architected
feature that QEMU has enabled by default that openbios doesn't know
about. From the user manual:

"The MPC7540 has a set of implementation-specific registers,
exceptions, and instructions that facilitate very efficient software
searching of the page tables in memory for when software table
searching is enabled (HID0[STEN] = 1). This section describes those
resources and provides three example code sequences that can be used
in a MPC7540 system for an efficient search of the translation tables
in software. These three code sequences can be used as handlers for
the three exceptions requiring access to the PTEs in the page tables
in memory in this case-instruction TLB miss, data TLB miss on load,
and data TLB miss on store exceptions."

The current state:

1) QEMU does not check HID0[STEN] and makes the feature always enabled
by setting these cpus with the POWERPC_MMU_SOFT_74xx MMU model,
instead of the generic POWERPC_MMU_32B.

2) openbios does not recognize the PVRs for those cpus and also does
not have any handlers for the software TLB exceptions (vectors 0x1000,
0x1100, 0x1200).

Some assumptions (correct me if I'm wrong please):

- openbios is the only firmware we use for the following cpus: 7441,
7445, 7450, 7451, 7455, 7457, 7447, 7447a, 7448.
- without openbios, we cannot have a guest running on these cpus.

So to bring 7450 back to life we would need to either:

a) find another firmware/guest OS code that supports the feature;

b) implement the switching of the feature in QEMU and have the guest
code enable it only when supported. That would take some fiddling with
the MMU code to: merge POWERPC_MMU_SOFT_74xx into POWERPC_MMU_32B,
check the HID0[STEN] bit, figure out how to switch from HW TLB miss to
SW TLB miss on demand, block access to the TLBMISS register (and
others) when the feature is off, and so on;

c) leave the feature enabled in QEMU and implement the software TLB
miss handlers in openbios. The UM provides sample code, so this is
easy;

d) remove support for software TLB search for the 7450 family and
switch the cpus to the POWERPC_MMU_32B model.

Re: [RFC PATCH v2 21/30] hw/intc: Add LoongArch extioi interrupt controller(EIOINTC)

2021-11-25 Thread yangxiaojuan

Hi Mark,

On 11/11/2021 10:49 PM, Mark Cave-Ayland wrote:
> On 11/11/2021 01:35, Xiaojuan Yang wrote:
> 
>> This patch realize the EIOINTC interrupt controller.
>>
>> Signed-off-by: Xiaojuan Yang 
>> Signed-off-by: Song Gao 
>> ---
>>   hw/intc/Kconfig|   3 +
>>   hw/intc/loongarch_extioi.c | 570 +
>>   hw/intc/meson.build|   1 +
>>   hw/loongarch/Kconfig   |   1 +
>>   include/hw/intc/loongarch_extioi.h |  99 +
>>   include/hw/loongarch/loongarch.h   |   1 +
>>   6 files changed, 675 insertions(+)
>>   create mode 100644 hw/intc/loongarch_extioi.c
>>   create mode 100644 include/hw/intc/loongarch_extioi.h
>>
>> diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig
>> index c0dc12dfa0..a2d9efd5aa 100644
>> --- a/hw/intc/Kconfig
>> +++ b/hw/intc/Kconfig
>> @@ -82,3 +82,6 @@ config LOONGARCH_PCH_MSI
>>   select MSI_NONBROKEN
>>   bool
>>   select UNIMP
>> +
>> +config LOONGARCH_EXTIOI
>> +bool
>> diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c
>> new file mode 100644
>> index 00..592cd8d1e2
>> --- /dev/null
>> +++ b/hw/intc/loongarch_extioi.c
>> @@ -0,0 +1,570 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +/*
>> + * Loongson 3A5000 ext interrupt controller emulation
>> + *
>> + * Copyright (C) 2021 Loongson Technology Corporation Limited
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/module.h"
>> +#include "qemu/log.h"
>> +#include "hw/irq.h"
>> +#include "hw/sysbus.h"
>> +#include "hw/loongarch/loongarch.h"
>> +#include "hw/qdev-properties.h"
>> +#include "exec/address-spaces.h"
>> +#include "hw/intc/loongarch_extioi.h"
>> +#include "migration/vmstate.h"
>> +
>> +#define DEBUG_APIC 0
>> +
>> +#define DPRINTF(fmt, ...) \
>> +do { \
>> +if (DEBUG_APIC) { \
>> +fprintf(stderr, "APIC: " fmt , ## __VA_ARGS__); \
>> +} \
>> +} while (0)
> 
> Again please use trace-events insead of DPRINTF().
> 
>> +static void extioi_update_irq(void *opaque, int irq_num, int level)
>> +{
>> +loongarch_extioi *s = opaque;
>> +uint8_t  ipnum, cpu;
>> +unsigned long found1, found2;
>> +
>> +ipnum = s->sw_ipmap[irq_num];
>> +cpu   = s->sw_coremap[irq_num];
>> +if (level == 1) {
>> +if (test_bit(irq_num, (void *)s->en_reg8) == false) {
>> +return;
>> +}
>> +bitmap_set((void *)s->coreisr_reg8[cpu], irq_num, 1);
>> +found1 = find_next_bit((void *)&(s->sw_ipisr[cpu][ipnum]),
>> +   EXTIOI_IRQS, 0);
>> +bitmap_set((void *)&(s->sw_ipisr[cpu][ipnum]), irq_num, 1);
>> +
>> +if (found1 >= EXTIOI_IRQS) {
>> +qemu_set_irq(s->parent_irq[cpu][ipnum], level);
>> +}
>> +} else {
>> +bitmap_clear((void *)s->coreisr_reg8[cpu], irq_num, 1);
>> +found1 = find_next_bit((void *)&(s->sw_ipisr[cpu][ipnum]),
>> +   EXTIOI_IRQS, 0);
>> +bitmap_clear((void *)&(s->sw_ipisr[cpu][ipnum]), irq_num, 1);
>> +found2 = find_next_bit((void *)&(s->sw_ipisr[cpu][ipnum]),
>> +   EXTIOI_IRQS, 0);
>> +
>> +if ((found1 < EXTIOI_IRQS) && (found2 >= EXTIOI_IRQS)) {
>> +qemu_set_irq(s->parent_irq[cpu][ipnum], level);
>> +}
>> +}
>> +}
>> +
>> +static void extioi_setirq(void *opaque, int irq, int level)
>> +{
>> +loongarch_extioi *s = opaque;
>> +extioi_update_irq(s, irq, level);
>> +}
>> +
>> +static void extioi_handler(void *opaque, int irq, int level)
>> +{
>> +loongarch_extioi *extioi = (loongarch_extioi *)opaque;
>> +
>> +qemu_set_irq(extioi->irq[irq], level);
>> +}
>> +
>> +static uint32_t extioi_readb(void *opaque, hwaddr addr)
>> +{
>> +loongarch_extioi *state = opaque;
> 
> Add a QOM cast here.
> 
>> +unsigned long offset, reg_count;
>> +uint8_t ret;
>> +int cpu;
>> +
>> +offset = addr & 0x;
>> +
>> +if ((offset >= EXTIOI_ENABLE_START) && (offset < EXTIOI_ENABLE_END)) {
>> +reg_count = (offset - EXTIOI_ENABLE_START);
>> +ret = state->en_reg8[reg_count];
>> +} else if ((offset >= EXTIOI_BOUNCE_START) &&
>> +   (offset < EXTIOI_BOUNCE_END)) {
>> +reg_count = (offset - EXTIOI_BOUNCE_START);
>> +ret = state->bounce_reg8[reg_count];
>> +} else if ((offset >= EXTIOI_COREISR_START) &&
>> +   (offset < EXTIOI_COREISR_END)) {
>> +reg_count = ((offset - EXTIOI_COREISR_START) & 0x1f);
>> +cpu = ((offset - EXTIOI_COREISR_START) >> 8) & 0x3;
>> +ret = state->coreisr_reg8[cpu][reg_count];
>> +} else if ((offset >= EXTIOI_IPMAP_START) &&
>> +   (offset < EXTIOI_IPMAP_END)) {
>> +reg_count = (offset - EXTIOI_IPMAP_START);
>> +ret = state->ipmap_reg8[reg_count];
>> +} else if ((offset >= EXTIOI_COREMAP_START) &&
>> +   (offset < EXTIOI_COREMAP_END)) {
>> +reg_count = (offset - EXTIOI_CO

Re: [PATCH 1/2] virito-balloon: process all in sgs for free_page_vq

2021-11-25 Thread David Hildenbrand

On 25.11.21 03:20, Jason Wang wrote:
> We only process the first in sg which may lead to the bitmap of the
> pages belongs to following sgs were not cleared. This may result more
> pages to be migrated. Fixing this by process all in sgs for
> free_page_vq.
> 
> Signed-off-by: Jason Wang 
> ---
>  hw/virtio/virtio-balloon.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index c6962fcbfe..17de2558cb 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -510,6 +510,7 @@ static bool get_free_page_hints(VirtIOBalloon *dev)
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>  VirtQueue *vq = dev->free_page_vq;
>  bool ret = true;
> +int i;
>  
>  while (dev->block_iothread) {
>  qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
> @@ -544,8 +545,10 @@ static bool get_free_page_hints(VirtIOBalloon *dev)
>  }
>  
>  if (elem->in_num && dev->free_page_hint_status == 
> FREE_PAGE_HINT_S_START) {
> -qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
> -  elem->in_sg[0].iov_len);
> +for (i = 0; i < elem->in_num; i++) {
> +qemu_guest_free_page_hint(elem->in_sg[i].iov_base,
> +  elem->in_sg[i].iov_len);
> +}
>  }
>  
>  out:
> 

Yes, but:

1. Linux never used more than one
2. QEMU never consumed more than one

The spec states:

"(b) The driver maps a series of pages and adds them to the free_page_vq
as individual scatter-gather input buffer entries."

However, the spec was written by someone else (Alex) as the code was
(Wei). The code was there first.

I don't particularly care what to adjust (code or spec). However, to me
it feels more like the spec is slightly wrong and it was intended like
the code is by the original code author.

But then again, I don't particularly care :)

-- 
Thanks,

David / dhildenb

Re: [PATCH 1/2] virito-balloon: process all in sgs for free_page_vq

2021-11-25 Thread Philippe Mathieu-Daudé

On 11/25/21 03:20, Jason Wang wrote:
> We only process the first in sg which may lead to the bitmap of the
> pages belongs to following sgs were not cleared. This may result more
> pages to be migrated. Fixing this by process all in sgs for
> free_page_vq.
> 
> Signed-off-by: Jason Wang 
> ---
>  hw/virtio/virtio-balloon.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)

Typo "virtio" in subject.

Re: [PATCH 1/2] hw/arm/virt: Rename default_bus_bypass_iommu

2021-11-25 Thread Markus Armbruster

Jean-Philippe Brucker  writes:

> Hi Markus,
>
> On Thu, Nov 25, 2021 at 08:11:04AM +0100, Markus Armbruster wrote:
>> Peter, this patch fixes a bug that became a regression when the fix
>> missed 6.1.  It's been stuck on the list since August.  Please have a
>> look, and if it's good, get it merged.  I'll ask the i386/pc maintainers
>> to do the same for PATCH 2.
>
> Both fixes have been merged in v6.2 (9dad363a223d and 739b38630c45)

I'm cross-eyed this morning.  Thanks!

[PATCH] linux-user: move target_signal.h generic definitions to generic/signal.h

2021-11-25 Thread Song Gao

No code change

Suggested-by: Richard Henderson 
Signed-off-by: Song Gao 
---
 linux-user/aarch64/target_signal.h| 18 --
 linux-user/arm/target_signal.h| 18 --
 linux-user/cris/target_signal.h   | 18 --
 linux-user/generic/signal.h   | 16 
 linux-user/hexagon/target_signal.h| 11 ---
 linux-user/i386/target_signal.h   | 18 --
 linux-user/m68k/target_signal.h   | 18 --
 linux-user/microblaze/target_signal.h | 18 --
 linux-user/nios2/target_signal.h  | 16 
 linux-user/openrisc/target_signal.h   | 23 ---
 linux-user/ppc/target_signal.h| 18 --
 linux-user/riscv/target_signal.h  | 12 
 linux-user/s390x/target_signal.h  | 15 ---
 linux-user/sh4/target_signal.h| 18 --
 linux-user/x86_64/target_signal.h | 18 --
 linux-user/xtensa/target_signal.h | 17 -
 16 files changed, 16 insertions(+), 256 deletions(-)

diff --git a/linux-user/aarch64/target_signal.h 
b/linux-user/aarch64/target_signal.h
index 7580d99..40e399d 100644
--- a/linux-user/aarch64/target_signal.h
+++ b/linux-user/aarch64/target_signal.h
@@ -1,24 +1,6 @@
 #ifndef AARCH64_TARGET_SIGNAL_H
 #define AARCH64_TARGET_SIGNAL_H
 
-/* this struct defines a stack used during syscall handling */
-
-typedef struct target_sigaltstack {
-abi_ulong ss_sp;
-abi_int ss_flags;
-abi_ulong ss_size;
-} target_stack_t;
-
-
-/*
- * sigaltstack controls
- */
-#define TARGET_SS_ONSTACK 1
-#define TARGET_SS_DISABLE 2
-
-#define TARGET_MINSIGSTKSZ 2048
-#define TARGET_SIGSTKSZ 8192
-
 #include "../generic/signal.h"
 
 #define TARGET_SEGV_MTEAERR  8  /* Asynchronous ARM MTE error */
diff --git a/linux-user/arm/target_signal.h b/linux-user/arm/target_signal.h
index 1e7fb0c..0e6351d 100644
--- a/linux-user/arm/target_signal.h
+++ b/linux-user/arm/target_signal.h
@@ -1,24 +1,6 @@
 #ifndef ARM_TARGET_SIGNAL_H
 #define ARM_TARGET_SIGNAL_H
 
-/* this struct defines a stack used during syscall handling */
-
-typedef struct target_sigaltstack {
-abi_ulong ss_sp;
-abi_int ss_flags;
-abi_ulong ss_size;
-} target_stack_t;
-
-
-/*
- * sigaltstack controls
- */
-#define TARGET_SS_ONSTACK  1
-#define TARGET_SS_DISABLE  2
-
-#define TARGET_MINSIGSTKSZ 2048
-#define TARGET_SIGSTKSZ8192
-
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
diff --git a/linux-user/cris/target_signal.h b/linux-user/cris/target_signal.h
index 83a5155..ab0653f 100644
--- a/linux-user/cris/target_signal.h
+++ b/linux-user/cris/target_signal.h
@@ -1,24 +1,6 @@
 #ifndef CRIS_TARGET_SIGNAL_H
 #define CRIS_TARGET_SIGNAL_H
 
-/* this struct defines a stack used during syscall handling */
-
-typedef struct target_sigaltstack {
-abi_ulong ss_sp;
-abi_int ss_flags;
-abi_ulong ss_size;
-} target_stack_t;
-
-
-/*
- * sigaltstack controls
- */
-#define TARGET_SS_ONSTACK 1
-#define TARGET_SS_DISABLE 2
-
-#define TARGET_MINSIGSTKSZ2048
-#define TARGET_SIGSTKSZ   8192
-
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
diff --git a/linux-user/generic/signal.h b/linux-user/generic/signal.h
index 943bc1a..ceaf8a8 100644
--- a/linux-user/generic/signal.h
+++ b/linux-user/generic/signal.h
@@ -55,6 +55,22 @@
 #define TARGET_SIG_UNBLOCK1/* for unblocking signals */
 #define TARGET_SIG_SETMASK2/* for setting the signal mask */
 
+/* this struct defines a stack used during syscall handling */
+typedef struct target_sigaltstack {
+abi_ulong ss_sp;
+abi_int ss_flags;
+abi_ulong ss_size;
+} target_stack_t;
+
+/*
+ * sigaltstack controls
+ */
+#define TARGET_SS_ONSTACK 1
+#define TARGET_SS_DISABLE 2
+
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_SIGSTKSZ8192
+
 /* bit-flags */
 #define TARGET_SS_AUTODISARM (1U << 31) /* disable sas during sighandling */
 /* mask for all SS_xxx flags */
diff --git a/linux-user/hexagon/target_signal.h 
b/linux-user/hexagon/target_signal.h
index 9e0223d..193abac 100644
--- a/linux-user/hexagon/target_signal.h
+++ b/linux-user/hexagon/target_signal.h
@@ -18,17 +18,6 @@
 #ifndef HEXAGON_TARGET_SIGNAL_H
 #define HEXAGON_TARGET_SIGNAL_H
 
-typedef struct target_sigaltstack {
-abi_ulong ss_sp;
-abi_int ss_flags;
-abi_ulong ss_size;
-} target_stack_t;
-
-#define TARGET_SS_ONSTACK 1
-#define TARGET_SS_DISABLE 2
-
-#define TARGET_MINSIGSTKSZ 2048
-
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
diff --git a/linux-user/i386/target_signal.h b/linux-user/i386/target_signal.h
index 64d09f2..9315cba 100644
--- a/linux-user/i386/target_signal.h
+++ b/linux-user/i386/target_signal.h
@@ -1,24 +1,6 @@
 #ifndef I386_TARGET_SIGNAL_H
 #define I386_TARGET_SIGNAL_H
 
-/* this struct defines a stack used during syscall

Re: [PATCH 2/2] hw/i386: Rename default_bus_bypass_iommu

2021-11-25 Thread Markus Armbruster

"Michael S. Tsirkin"  writes:

> On Thu, Nov 25, 2021 at 08:12:25AM +0100, Markus Armbruster wrote:
>> Michael & Marcel, this patch fixes a bug that became a regression when
>> the fix missed 6.1.  It's been stuck on the list since August.  Please
>> have a look, and if it's good, get it merged.
>
>
> Thanks for the reminder. Jean-Philippe, Cc maintainers
> will help patches get merged in a timely manner in the future.
>
>>  I just asked the arm/virt
>> maintainer to do the same for PATCH 1.
>
> Some questions below.
>
>> Jean-Philippe Brucker  writes:
>> 
>> > Since commit d8fb7d0969d5 ("vl: switch -M parsing to keyval"), machine
>> > parameter definitions cannot use underscores, because keyval_dashify()
>> > transforms them to dashes and the parser doesn't find the parameter.
>> >
>> > This affects option default_bus_bypass_iommu which was introduced in the
>> > same release:
>> >
>> > $ qemu-system-x86_64 -M q35,default_bus_bypass_iommu=on
>> > qemu-system-x86_64: Property 'pc-q35-6.1-machine.default-bus-bypass-iommu' 
>> > not found
>
> I don't see this behaviour. What did I miss?

I looked at the source and somehow missed that the fix is already there
(commit 739b38630c45).  Sorry for the noise!

[...]

[PATCH] linux-user: target_syscall.h remove definition TARGET_MINSIGSTKSZ

2021-11-25 Thread Song Gao

TARGET_MINSIGSTKSZ has been defined in generic/signal.h
or target_signal.h, We don't need to define it again.

Signed-off-by: Song Gao 
---
 linux-user/aarch64/target_syscall.h| 1 -
 linux-user/alpha/target_syscall.h  | 1 -
 linux-user/arm/target_syscall.h| 1 -
 linux-user/cris/target_syscall.h   | 1 -
 linux-user/hppa/target_syscall.h   | 1 -
 linux-user/i386/target_syscall.h   | 1 -
 linux-user/m68k/target_syscall.h   | 1 -
 linux-user/microblaze/target_syscall.h | 1 -
 linux-user/mips/target_syscall.h   | 1 -
 linux-user/mips64/target_syscall.h | 1 -
 linux-user/nios2/target_syscall.h  | 1 -
 linux-user/openrisc/target_syscall.h   | 1 -
 linux-user/ppc/target_syscall.h| 1 -
 linux-user/riscv/target_syscall.h  | 1 -
 linux-user/s390x/target_syscall.h  | 1 -
 linux-user/sh4/target_syscall.h| 1 -
 linux-user/sparc/target_syscall.h  | 1 -
 linux-user/x86_64/target_syscall.h | 1 -
 18 files changed, 18 deletions(-)

diff --git a/linux-user/aarch64/target_syscall.h 
b/linux-user/aarch64/target_syscall.h
index 76f6c33..508219d 100644
--- a/linux-user/aarch64/target_syscall.h
+++ b/linux-user/aarch64/target_syscall.h
@@ -15,7 +15,6 @@ struct target_pt_regs {
 #endif
 #define UNAME_MINIMUM_RELEASE "3.8.0"
 #define TARGET_CLONE_BACKWARDS
-#define TARGET_MINSIGSTKSZ   2048
 #define TARGET_MCL_CURRENT 1
 #define TARGET_MCL_FUTURE  2
 #define TARGET_MCL_ONFAULT 4
diff --git a/linux-user/alpha/target_syscall.h 
b/linux-user/alpha/target_syscall.h
index 03091bf..fda3a49 100644
--- a/linux-user/alpha/target_syscall.h
+++ b/linux-user/alpha/target_syscall.h
@@ -63,7 +63,6 @@ struct target_pt_regs {
 #define TARGET_UAC_NOPRINT 1
 #define TARGET_UAC_NOFIX   2
 #define TARGET_UAC_SIGBUS  4
-#define TARGET_MINSIGSTKSZ  4096
 #define TARGET_MCL_CURRENT 0x2000
 #define TARGET_MCL_FUTURE  0x4000
 #define TARGET_MCL_ONFAULT 0x8000
diff --git a/linux-user/arm/target_syscall.h b/linux-user/arm/target_syscall.h
index e870ed7..f04f9c9 100644
--- a/linux-user/arm/target_syscall.h
+++ b/linux-user/arm/target_syscall.h
@@ -27,7 +27,6 @@ struct target_pt_regs {
 
 #define TARGET_CLONE_BACKWARDS
 
-#define TARGET_MINSIGSTKSZ 2048
 #define TARGET_MCL_CURRENT 1
 #define TARGET_MCL_FUTURE  2
 #define TARGET_MCL_ONFAULT 4
diff --git a/linux-user/cris/target_syscall.h b/linux-user/cris/target_syscall.h
index 19e1281..0b5ebf1 100644
--- a/linux-user/cris/target_syscall.h
+++ b/linux-user/cris/target_syscall.h
@@ -39,7 +39,6 @@ struct target_pt_regs {
 };
 
 #define TARGET_CLONE_BACKWARDS2
-#define TARGET_MINSIGSTKSZ 2048
 #define TARGET_MCL_CURRENT 1
 #define TARGET_MCL_FUTURE  2
 #define TARGET_MCL_ONFAULT 4
diff --git a/linux-user/hppa/target_syscall.h b/linux-user/hppa/target_syscall.h
index 0018bcb..4b382c1 100644
--- a/linux-user/hppa/target_syscall.h
+++ b/linux-user/hppa/target_syscall.h
@@ -22,7 +22,6 @@ struct target_pt_regs {
 #define UNAME_MACHINE "parisc"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 #define TARGET_CLONE_BACKWARDS
-#define TARGET_MINSIGSTKSZ   2048
 #define TARGET_MCL_CURRENT 1
 #define TARGET_MCL_FUTURE  2
 #define TARGET_MCL_ONFAULT 4
diff --git a/linux-user/i386/target_syscall.h b/linux-user/i386/target_syscall.h
index ed356b3..aaade06 100644
--- a/linux-user/i386/target_syscall.h
+++ b/linux-user/i386/target_syscall.h
@@ -150,7 +150,6 @@ struct target_vm86plus_struct {
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
-#define TARGET_MINSIGSTKSZ 2048
 #define TARGET_MCL_CURRENT 1
 #define TARGET_MCL_FUTURE  2
 #define TARGET_MCL_ONFAULT 4
diff --git a/linux-user/m68k/target_syscall.h b/linux-user/m68k/target_syscall.h
index 23359a6..8d4ddbd 100644
--- a/linux-user/m68k/target_syscall.h
+++ b/linux-user/m68k/target_syscall.h
@@ -20,7 +20,6 @@ struct target_pt_regs {
 #define UNAME_MACHINE "m68k"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
-#define TARGET_MINSIGSTKSZ 2048
 #define TARGET_MCL_CURRENT 1
 #define TARGET_MCL_FUTURE  2
 #define TARGET_MCL_ONFAULT 4
diff --git a/linux-user/microblaze/target_syscall.h 
b/linux-user/microblaze/target_syscall.h
index 7f653db..43362a1 100644
--- a/linux-user/microblaze/target_syscall.h
+++ b/linux-user/microblaze/target_syscall.h
@@ -49,7 +49,6 @@ struct target_pt_regs {
 };
 
 #define TARGET_CLONE_BACKWARDS
-#define TARGET_MINSIGSTKSZ  2048
 #define TARGET_MCL_CURRENT 1
 #define TARGET_MCL_FUTURE  2
 #define TARGET_MCL_ONFAULT 4
diff --git a/linux-user/mips/target_syscall.h b/linux-user/mips/target_syscall.h
index f590574..7a82661 100644
--- a/linux-user/mips/target_syscall.h
+++ b/linux-user/mips/target_syscall.h
@@ -24,7 +24,6 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
-#define TARGET_MINSIGSTKSZ 2048
 #define TARGET_MCL_CURRENT 1
 #define TARGET_MCL_FUTURE  2
 #define TARGET_MCL_ONFAULT 4
diff --git a/linux-user/mips64/target_syscall.h 
b/linux-user

Re: [PATCH v1 1/2] monitor: move monitor destruction to the very end of qemu cleanup

2021-11-25 Thread Markus Armbruster

Denis Plotnikov  writes:

> This is needed to keep sending DEVICE_DELETED events on qemu cleanup.
> The event may happen in the rcu thread and we're going to flush the rcu queue
> explicitly before qemu exiting in the next patch. So move the monitor
> destruction to the very end of qemu cleanup to be able to send all the events.
>
> Signed-off-by: Denis Plotnikov 
> ---
>  monitor/monitor.c  | 6 ++
>  softmmu/runstate.c | 2 +-
>  2 files changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/monitor/monitor.c b/monitor/monitor.c
> index 21c7a68758f5..b04ae4850db2 100644
> --- a/monitor/monitor.c
> +++ b/monitor/monitor.c
> @@ -605,11 +605,17 @@ void monitor_data_init(Monitor *mon, bool is_qmp, bool 
> skip_flush,
>  mon->outbuf = g_string_new(NULL);
>  mon->skip_flush = skip_flush;
>  mon->use_io_thread = use_io_thread;
> +/*
> + * take an extra ref to prevent monitor's chardev
> + * from destroying in qemu_chr_cleanup()
> + */
> +object_ref(OBJECT(mon->chr.chr));

I'm not sure we need the comment in the long run.

Taking a reference changes mon->chr.chr from soft reference to hard
reference.  Feels right to me.

Note that mon->chr.chr isn't set here, but earlier.  Unlike the other
parts of @mon.  Because of this it starts as a soft reference, and
hardens only here.

It's set in three places:

1. monitor_init_hmp():

if (!qemu_chr_fe_init(&mon->common.chr, chr, errp)) {
g_free(mon);
return;
}

monitor_data_init(&mon->common, false, false, false);

2. monitor_init_qmp():

if (!qemu_chr_fe_init(&mon->common.chr, chr, errp)) {
g_free(mon);
return;
}
qemu_chr_fe_set_echo(&mon->common.chr, true);

/* Note: we run QMP monitor in I/O thread when @chr supports that */
monitor_data_init(&mon->common, true, false,
  qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_GCONTEXT));

3. qmp_human_monitor_command()

MonitorHMP hmp = {};

monitor_data_init(&hmp.common, false, true, false);

   hmp.common.chr.chr remains null here.  Works, because
   object_ref(OBJECT(NULL)) and object_unref(OBJECT(NULL)) do nothing.

Slightly cleaner, I think: pass the character device as an argument to
monitor_data_init(), take a reference and store it in mon->chr.chr
there.

>  }
>  
>  void monitor_data_destroy(Monitor *mon)
>  {
>  g_free(mon->mon_cpu_path);
> +object_unref(OBJECT(mon->chr.chr));
>  qemu_chr_fe_deinit(&mon->chr, false);
>  if (monitor_is_qmp(mon)) {
>  monitor_data_destroy_qmp(container_of(mon, MonitorQMP, common));
> diff --git a/softmmu/runstate.c b/softmmu/runstate.c
> index 10d9b7365aa7..8d29dd2c00e2 100644
> --- a/softmmu/runstate.c
> +++ b/softmmu/runstate.c
> @@ -819,8 +819,8 @@ void qemu_cleanup(void)
>  tpm_cleanup();
>  net_cleanup();
>  audio_cleanup();
> -monitor_cleanup();
>  qemu_chr_cleanup();
>  user_creatable_cleanup();
> +monitor_cleanup();
>  /* TODO: unref root container, check all devices are ok */
>  }

Monitor cleanup now runs with character devices that have been
"unparented" from the QOM tree.  Paolo, is that okay?

Re: [OpenBIOS] Re: [RFC PATCH 0/2] QEMU/openbios: PPC Software TLB support in the G4 family

2021-11-25 Thread Segher Boessenkool

Hi!

On Thu, Nov 25, 2021 at 01:45:00AM +0100, BALATON Zoltan wrote:
> As for guests, those running on the said PowerMac G4 should have support 
> for these CPUs so maybe you can try some Mac OS X versions (or maybe 

OSX uses hardware pagetables.

> MorphOS but that is not the best for debugging as there's no source 
> available nor any help from its owners but just to see if it boots it may 
> be sufficient, it should work on real PowerMac G4).

I have no idea what MorphOS uses, but I bet HPT as well.  That is
because HPT is fastest in general.  Software TLB reloads are good in
special cases only; the most common is real-time OSes, which can use its
lower guaranteed latency for some special address spaces (and can have a
simpler address map in general).

Segher

Re: [PATCH] linux-user: move target_signal.h generic definitions to generic/signal.h

2021-11-25 Thread Laurent Vivier


Le 25/11/2021 à 09:55, Song Gao a écrit :

No code change

Suggested-by: Richard Henderson 
Signed-off-by: Song Gao 
---
  linux-user/aarch64/target_signal.h| 18 --
  linux-user/arm/target_signal.h| 18 --
  linux-user/cris/target_signal.h   | 18 --
  linux-user/generic/signal.h   | 16 
  linux-user/hexagon/target_signal.h| 11 ---
  linux-user/i386/target_signal.h   | 18 --
  linux-user/m68k/target_signal.h   | 18 --
  linux-user/microblaze/target_signal.h | 18 --
  linux-user/nios2/target_signal.h  | 16 
  linux-user/openrisc/target_signal.h   | 23 ---
  linux-user/ppc/target_signal.h| 18 --
  linux-user/riscv/target_signal.h  | 12 
  linux-user/s390x/target_signal.h  | 15 ---
  linux-user/sh4/target_signal.h| 18 --
  linux-user/x86_64/target_signal.h | 18 --
  linux-user/xtensa/target_signal.h | 17 -
  16 files changed, 16 insertions(+), 256 deletions(-)



Reviewed-by: Laurent Vivier

Re: [PATCH] linux-user: target_syscall.h remove definition TARGET_MINSIGSTKSZ

2021-11-25 Thread Laurent Vivier


Le 25/11/2021 à 09:58, Song Gao a écrit :

TARGET_MINSIGSTKSZ has been defined in generic/signal.h
or target_signal.h, We don't need to define it again.

Signed-off-by: Song Gao 
---
  linux-user/aarch64/target_syscall.h| 1 -
  linux-user/alpha/target_syscall.h  | 1 -
  linux-user/arm/target_syscall.h| 1 -
  linux-user/cris/target_syscall.h   | 1 -
  linux-user/hppa/target_syscall.h   | 1 -
  linux-user/i386/target_syscall.h   | 1 -
  linux-user/m68k/target_syscall.h   | 1 -
  linux-user/microblaze/target_syscall.h | 1 -
  linux-user/mips/target_syscall.h   | 1 -
  linux-user/mips64/target_syscall.h | 1 -
  linux-user/nios2/target_syscall.h  | 1 -
  linux-user/openrisc/target_syscall.h   | 1 -
  linux-user/ppc/target_syscall.h| 1 -
  linux-user/riscv/target_syscall.h  | 1 -
  linux-user/s390x/target_syscall.h  | 1 -
  linux-user/sh4/target_syscall.h| 1 -
  linux-user/sparc/target_syscall.h  | 1 -
  linux-user/x86_64/target_syscall.h | 1 -
  18 files changed, 18 deletions(-)



Reviewed-by: Laurent Vivier

Re: Other pages for docs/devel/ [Was: Re: ... Update URLs of "SubmitAPatch" ...]

2021-11-25 Thread Kashyap Chamarthy

On Wed, Nov 24, 2021 at 08:35:35PM +0100, Paolo Bonzini wrote:
> On 11/19/21 11:15, Kashyap Chamarthy wrote:
> > Noted; so these two pages:
> > 
> > -https://www.qemu.org/contribute/security-process/
> > -https://www.qemu.org/contribute/report-a-bug/

I posted a series for above two here:

https://lists.nongnu.org/archive/html/qemu-devel/2021-11/msg04936.html
[PATCH 0/2] rSTify 'report-a-bug' and 'security-process'; move them
to QEMU Git

> > What about these two other pages (which are also linked to from the
> > "contribute"[1] part of the website)?  Do they belong to docs/devel?  If
> > not, should they be part of 'qemu-web', instead of the wiki?
> > 
> >  -https://wiki.qemu.org/Contribute/FAQ
> >  -https://wiki.qemu.org/Documentation/GettingStartedDevelopers
> 
> Yes, I suppose all of these can be moved to docs/devel.

I thought so; will send them as part of v2 of the above series.

-- 
/kashyap

Re: [PATCH] linux-user: move target_signal.h generic definitions to generic/signal.h

2021-11-25 Thread WANG Xuerui

Hi,

On 2021/11/25 16:55, Song Gao wrote:
> diff --git a/linux-user/generic/signal.h b/linux-user/generic/signal.h
> index 943bc1a..ceaf8a8 100644
> --- a/linux-user/generic/signal.h
> +++ b/linux-user/generic/signal.h
> @@ -55,6 +55,22 @@
>  #define TARGET_SIG_UNBLOCK1/* for unblocking signals */
>  #define TARGET_SIG_SETMASK2/* for setting the signal mask */
>  
> +/* this struct defines a stack used during syscall handling */
> +typedef struct target_sigaltstack {
> +abi_ulong ss_sp;
> +abi_int ss_flags;
> +abi_ulong ss_size;
> +} target_stack_t;
> +
> +/*
> + * sigaltstack controls
> + */
> +#define TARGET_SS_ONSTACK 1
> +#define TARGET_SS_DISABLE 2
> +
> +#define TARGET_MINSIGSTKSZ 2048

While all the architectures you de-duplicated here have
TARGET_MINSIGSTACKSZ as 2048, some others specify a different value
(mostly 4096, e.g. alpha), as can be seen in your next patch (which
should belong to this series, btw). Do you mean to change semantics
here? Or you might have to allow arches to override this value.

> +#define TARGET_SIGSTKSZ8192
> +
>  /* bit-flags */
>  #define TARGET_SS_AUTODISARM (1U << 31) /* disable sas during sighandling */
>  /* mask for all SS_xxx flags */

Re: [PATCH v11 19/26] linux-user: Add LoongArch signal support

2021-11-25 Thread Richard Henderson


On 11/25/21 4:03 AM, gaosong wrote:

I also see that TARGET_SIGSTKSZ is not used.  I think  we should delete it.


Agreed.  This constant will have been baked into the guest executable.


r~

Re: [PATCH] linux-user: target_syscall.h remove definition TARGET_MINSIGSTKSZ

2021-11-25 Thread Philippe Mathieu-Daudé

On 11/25/21 09:58, Song Gao wrote:
> TARGET_MINSIGSTKSZ has been defined in generic/signal.h
> or target_signal.h, We don't need to define it again.
> 
> Signed-off-by: Song Gao 
> ---
>  linux-user/aarch64/target_syscall.h| 1 -
>  linux-user/alpha/target_syscall.h  | 1 -
>  linux-user/arm/target_syscall.h| 1 -
>  linux-user/cris/target_syscall.h   | 1 -
>  linux-user/hppa/target_syscall.h   | 1 -
>  linux-user/i386/target_syscall.h   | 1 -
>  linux-user/m68k/target_syscall.h   | 1 -
>  linux-user/microblaze/target_syscall.h | 1 -
>  linux-user/mips/target_syscall.h   | 1 -
>  linux-user/mips64/target_syscall.h | 1 -
>  linux-user/nios2/target_syscall.h  | 1 -
>  linux-user/openrisc/target_syscall.h   | 1 -
>  linux-user/ppc/target_syscall.h| 1 -
>  linux-user/riscv/target_syscall.h  | 1 -
>  linux-user/s390x/target_syscall.h  | 1 -
>  linux-user/sh4/target_syscall.h| 1 -
>  linux-user/sparc/target_syscall.h  | 1 -
>  linux-user/x86_64/target_syscall.h | 1 -
>  18 files changed, 18 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé

[PATCH v2 0/2] vdpa: Fix SIGSEGV on failed vdpa devices

2021-11-25 Thread Eugenio Pérez

Qemu falls back on userland handlers even if vhost-user and vhost-vdpa
cases. These assumes a tap device can handle the packets.

If a vdpa device fail to start, it can trigger a sigsegv because of
that. Add dummy receivers that return no progress so it can keep
running.

Tested with a modified version of vp_vdpa to fail negotiation.

v2:
* Replace dummy receive_{iov,raw} with receive callback.
* Delete fix indentation commit, we don't touch that code anymore.

Eugenio Pérez (2):
  vdpa: Add dummy receive callback
  virtio-net: Fix log message

 hw/net/virtio-net.c | 11 ++-
 net/vhost-vdpa.c|  8 
 2 files changed, 14 insertions(+), 5 deletions(-)

-- 
2.27.0

[PATCH v2 2/2] virtio-net: Fix log message

2021-11-25 Thread Eugenio Pérez

The message has never been true in the case of non tap networking, so
only tell that userland networking will be used if possible.

Signed-off-by: Eugenio Pérez 
---
 hw/net/virtio-net.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index f2014d5ea0..d6c98c3c2d 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -245,6 +245,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t 
status)
 NetClientState *nc = qemu_get_queue(n->nic);
 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 int cvq = n->max_ncs - n->max_queue_pairs;
+bool tap_backend = nc->peer->info->type == NET_CLIENT_DRIVER_TAP;
 
 if (!get_vhost_net(nc->peer)) {
 return;
@@ -258,9 +259,9 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t 
status)
 int r, i;
 
 if (n->needs_vnet_hdr_swap) {
-error_report("backend does not support %s vnet headers; "
- "falling back on userspace virtio",
- virtio_is_big_endian(vdev) ? "BE" : "LE");
+error_report("backend does not support %s vnet headers%s",
+virtio_is_big_endian(vdev) ? "BE" : "LE",
+tap_backend ? "; falling back on userspace virtio" : "");
 return;
 }
 
@@ -288,8 +289,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t 
status)
 n->vhost_started = 1;
 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
 if (r < 0) {
-error_report("unable to start vhost net: %d: "
- "falling back on userspace virtio", -r);
+error_report("unable to start vhost net: %d%s", -r,
+   tap_backend ? " falling back on userspace virtio" : "");
 n->vhost_started = 0;
 }
 } else {
-- 
2.27.0

Re: [PATCH v3 1/3] target/ppc: Fixed call to deferred exception

2021-11-25 Thread Richard Henderson


On 11/24/21 6:25 PM, Lucas Mateus Castro (alqotel) wrote:

mtfsf, mtfsfi and mtfsb1 instructions call helper_float_check_status
after updating the value of FPSCR, but helper_float_check_status
checks fp_status and fp_status isn't updated based on FPSCR and
since the value of fp_status is reset earlier in the instruction,
it's always 0.

Because of this helper_float_check_status would change the FI bit to 0
as this bit checks if the last operation was inexact and
float_flag_inexact is always 0.

These instructions also don't throw exceptions correctly since
helper_float_check_status throw exceptions based on fp_status.

This commit created a new helper, helper_fpscr_check_status that checks
FPSCR value instead of fp_status and checks for a larger variety of
exceptions than do_float_check_status.

Since fp_status isn't used, gen_reset_fpstatus() was removed.

The hardware used to compare QEMU's behavior to was a Power9.

Signed-off-by: Lucas Mateus Castro (alqotel) 
---
  target/ppc/fpu_helper.c| 48 ++
  target/ppc/helper.h|  1 +
  target/ppc/translate/fp-impl.c.inc |  9 ++
  3 files changed, 52 insertions(+), 6 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [PATCH v6 01/16] linux-user: Untabify all safe-syscall.inc.S

2021-11-25 Thread Warner Losh

On Tue, Nov 23, 2021 at 10:38 AM Richard Henderson <
richard.hender...@linaro.org> wrote:

> Suggested-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> ---
>  linux-user/host/aarch64/safe-syscall.inc.S | 110 +++---
>  linux-user/host/arm/safe-syscall.inc.S | 134 -
>  linux-user/host/i386/safe-syscall.inc.S| 158 ++---
>  linux-user/host/ppc64/safe-syscall.inc.S   | 134 -
>  linux-user/host/riscv/safe-syscall.inc.S   | 114 +++
>  linux-user/host/s390x/safe-syscall.inc.S   | 140 +-
>  linux-user/host/x86_64/safe-syscall.inc.S  |   2 +-
>  7 files changed, 396 insertions(+), 396 deletions(-)
>

Reviewed-by: Warner Losh

[PATCH v2 1/2] vdpa: Add dummy receive callback

2021-11-25 Thread Eugenio Pérez

Qemu falls back on userland handlers even if vhost-user and vhost-vdpa
cases. These assumes a tap device can handle the packets.

If a vdpa device fail to start, it can trigger a sigsegv because of
that. Add dummy receiver that returns no progress so it can keep
running.

Fixes: 1e0a84ea49 ("vhost-vdpa: introduce vhost-vdpa net client")
Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 2e3c22a8c7..25dd6dd975 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -170,9 +170,17 @@ static bool vhost_vdpa_check_peer_type(NetClientState *nc, 
ObjectClass *oc,
 return true;
 }
 
+/** Dummy receive in case qemu falls back to userland tap networking */
+static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
+  size_t size)
+{
+return 0;
+}
+
 static NetClientInfo net_vhost_vdpa_info = {
 .type = NET_CLIENT_DRIVER_VHOST_VDPA,
 .size = sizeof(VhostVDPAState),
+.receive = vhost_vdpa_receive,
 .cleanup = vhost_vdpa_cleanup,
 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
 .has_ufo = vhost_vdpa_has_ufo,
-- 
2.27.0

Re: [PATCH v3 2/3] test/tcg/ppc64le: test mtfsf

2021-11-25 Thread Richard Henderson


On 11/24/21 6:25 PM, Lucas Mateus Castro (alqotel) wrote:

Added tests for the mtfsf to check if FI bit of FPSCR is being set
and if exception calls are being made correctly.

Signed-off-by: Lucas Mateus Castro (alqotel) 
---
  tests/tcg/ppc64/Makefile.target   |  1 +
  tests/tcg/ppc64le/Makefile.target |  1 +
  tests/tcg/ppc64le/mtfsf.c | 61 +++
  3 files changed, 63 insertions(+)
  create mode 100644 tests/tcg/ppc64le/mtfsf.c


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v6 04/16] linux-user/host/mips: Add safe-syscall.inc.S

2021-11-25 Thread Warner Losh

On Tue, Nov 23, 2021 at 10:38 AM Richard Henderson <
richard.hender...@linaro.org> wrote:

> Signed-off-by: Richard Henderson 
> ---
>  linux-user/host/mips/hostdep.h  |   3 +
>  linux-user/host/mips/safe-syscall.inc.S | 123 
>  2 files changed, 126 insertions(+)
>  create mode 100644 linux-user/host/mips/safe-syscall.inc.S
>

Reviewed-by: Warner Losh

Re: [PATCH v6 03/16] linux-user: Move syscall error detection into safe_syscall_base

2021-11-25 Thread Warner Losh

On Tue, Nov 23, 2021 at 10:38 AM Richard Henderson <
richard.hender...@linaro.org> wrote:

> The current api from safe_syscall_base() is to return -errno, which is
> the interface provided by *some* linux kernel abis.  The wrapper macro,
> safe_syscall(), detects error, stores into errno, and returns -1, to
> match the api of the system syscall().
>
> For those kernel abis that do not return -errno natively, this leads
> to double syscall error detection.  E.g. Linux ppc64, which sets the
> SO flag for error.
>
> Simplify the usage from C by moving the error detection into assembly,
> and usage from assembly by providing a C helper with which to set errno.
>
> Signed-off-by: Richard Henderson 
> ---
>  linux-user/safe-syscall.h  | 16 +++---
>  linux-user/safe-syscall-error.c| 28 
>  linux-user/host/aarch64/safe-syscall.inc.S | 20 ++--
>  linux-user/host/arm/safe-syscall.inc.S | 27 ++--
>  linux-user/host/i386/safe-syscall.inc.S| 37 +++---
>  linux-user/host/ppc64/safe-syscall.inc.S   | 20 ++--
>  linux-user/host/riscv/safe-syscall.inc.S   | 20 ++--
>  linux-user/host/s390x/safe-syscall.inc.S   | 32 ---
>  linux-user/host/x86_64/safe-syscall.inc.S  | 29 +
>  linux-user/meson.build |  1 +
>  10 files changed, 145 insertions(+), 85 deletions(-)
>  create mode 100644 linux-user/safe-syscall-error.c
>

Reviewed-by: Warner Losh 

Note: My s390 assembler skills are quite weak, but it seems consistent with
the rest of the code. I'm less sure about the riscv and ppc64, but am quite
confident in the arm, aarch64, x86 and meson change. Not sure how to
otherwise
signal that when the review is uneven.


> diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h
> index aaa9ffc0e2..97837faddb 100644
> --- a/linux-user/safe-syscall.h
> +++ b/linux-user/safe-syscall.h
> @@ -127,21 +127,15 @@
>  #ifdef HAVE_SAFE_SYSCALL
>  /* The core part of this function is implemented in assembly */
>  extern long safe_syscall_base(int *pending, long number, ...);
> +extern long safe_syscall_set_errno_tail(int value);
> +
>  /* These are defined by the safe-syscall.inc.S file */
>  extern char safe_syscall_start[];
>  extern char safe_syscall_end[];
>
> -#define safe_syscall(...)   \
> -({  \
> -long ret_;  \
> -int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
> -ret_ = safe_syscall_base(psp_, __VA_ARGS__);\
> -if (is_error(ret_)) {   \
> -errno = -ret_;  \
> -ret_ = -1;  \
> -}   \
> -ret_;   \
> -})
> +#define safe_syscall(...)
>  \
> +safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending,
> \
> +  __VA_ARGS__)
>
>  #else
>
> diff --git a/linux-user/safe-syscall-error.c
> b/linux-user/safe-syscall-error.c
> new file mode 100644
> index 00..d7e2700f81
> --- /dev/null
> +++ b/linux-user/safe-syscall-error.c
> @@ -0,0 +1,28 @@
> +/*
> + * safe-syscall-error.c: errno setting fragment
> + * This is intended to be invoked by safe-syscall.S
> + *
> + * Written by Richard Henderson 
> + * Copyright (C) 2021 Red Hat, Inc.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hostdep.h"
> +#include "safe-syscall.h"
> +
> +#ifdef HAVE_SAFE_SYSCALL
> +/*
> + * This is intended to be invoked via tail-call on the error path
> + * from the assembly in host/arch/safe-syscall.inc.S.  This takes
> + * care of the host specific addressing of errno.
> + * Return -1 to finalize the return value for safe_syscall_base.
> + */
> +long safe_syscall_set_errno_tail(int value)
> +{
> +errno = value;
> +return -1;
> +}
> +#endif
> diff --git a/linux-user/host/aarch64/safe-syscall.inc.S
> b/linux-user/host/aarch64/safe-syscall.inc.S
> index e2e726ef55..76a0a18a6c 100644
> --- a/linux-user/host/aarch64/safe-syscall.inc.S
> +++ b/linux-user/host/aarch64/safe-syscall.inc.S
> @@ -22,15 +22,12 @@
>   * first argument an 'int *' to the signal_pending flag, the
>   * second one the system call number (as a 'long'), and all
> further
>   * arguments being syscall arguments (also 'long').
> - * We return a long which is the syscall's return value, which
> - * may be negative-errno on failure. Conversion to the
> - * -1-and-errno-set convention is done b

Re: Give the fosshost.org VM a purpose or a retirement

2021-11-25 Thread Warner Losh

On Wed, Nov 24, 2021 at 7:56 AM Philippe Mathieu-Daudé 
wrote:

> On 11/24/21 15:16, Cleber Rosa wrote:
> > Hi,
> >
> > Fosshost.org was kind enough to supply the QEMU project with a public
> > VM hosted by them.  The original use case we anticipated was to set up
> > a GitLab CI runner, because we assumed the VM was KVM capable, but
> > that turned out not to be the case.
> >
> > So, at this point, adding it as a GitLab CI runner would not add any
> > significant improvement over the shared runners already provided, and
> > it would require more maintenance effort.
> >
> > If there are any ideas for making use of this resource, and volunteers
> > to configure and maintain it, please let me know.
> >
> > Otherwise, it seems fair to relinquish the resource back to Fosshost.org.
>
> Cc'ing Warner, since for FreeBSD testing we use Cirrus-CI (via
> libvirt-ci and have sometime hit GitLab timeout). Series [*]
> provides most of what is required to register a FreeBSD runner.
> It could be useful to run the user-mode tests.
>

Indeed. I'd forgotten about this series. I'll take a look at this and/or
the fosshost.org stuff...

Warner

Re: Give the fosshost.org VM a purpose or a retirement

2021-11-25 Thread Daniel P . Berrangé

On Wed, Nov 24, 2021 at 03:55:55PM +0100, Philippe Mathieu-Daudé wrote:
> On 11/24/21 15:16, Cleber Rosa wrote:
> > Hi,
> > 
> > Fosshost.org was kind enough to supply the QEMU project with a public
> > VM hosted by them.  The original use case we anticipated was to set up
> > a GitLab CI runner, because we assumed the VM was KVM capable, but
> > that turned out not to be the case.
> > 
> > So, at this point, adding it as a GitLab CI runner would not add any
> > significant improvement over the shared runners already provided, and
> > it would require more maintenance effort.
> > 
> > If there are any ideas for making use of this resource, and volunteers
> > to configure and maintain it, please let me know.
> > 
> > Otherwise, it seems fair to relinquish the resource back to Fosshost.org.
> 
> Cc'ing Warner, since for FreeBSD testing we use Cirrus-CI (via
> libvirt-ci and have sometime hit GitLab timeout)

We're not seeing timeouts since we dropped the redundant jobs that
were running on master.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v4 1/3] hw/block/fdc: Extract blk_create_empty_drive()

2021-11-25 Thread Hanna Reitz


On 24.11.21 17:15, Philippe Mathieu-Daudé wrote:

We are going to re-use this code in the next commit,
so extract it as a new blk_create_empty_drive() function.

Inspired-by: Hanna Reitz 


:)


Signed-off-by: Philippe Mathieu-Daudé 
---
  hw/block/fdc.c | 9 +++--
  1 file changed, 7 insertions(+), 2 deletions(-)


Reviewed-by: Hanna Reitz

Re: [PATCH v5 07/18] target/riscv: setup everything so that riscv128-softmmu compiles

2021-11-25 Thread Alistair Francis

On Wed, Nov 24, 2021 at 5:33 PM Philippe Mathieu-Daudé  wrote:
>
> Hi Frédéric,
>
> On 11/24/21 07:55, Frédéric Pétrot wrote:
> > On 24/11/2021 07:12, Alistair Francis wrote:
> >> On Sat, Nov 13, 2021 at 1:16 AM Frédéric Pétrot
> >>  wrote:
> >>>
> >>> This patch is kind of a mess because several files have to be slightly
> >>> modified to allow for a new target. In the current status, we have done
> >>> our best to have RV64 and RV128 under the same RV64 umbrella, but there
> >>> is still work to do to have a single executable for both.
> >>> In particular, we have no atomic accesses for aligned 128-bit addresses.
> >>>
> >>> Once this patch applied, adding risc128-sofmmu to --target-list produces
> >>> a (no so useful yet) executable.
> >>
> >> I can't remember if we discussed this before, but do we need the
> >> riscv128-sofmmu executable? Can we instead just use a riscv64-sofmmu
> >> executable?
> >
> >   Hello Alistair,
> >   Richard was also advocating for a single executable, but pointed out that
> >   we need to disable mttcg because there is a need for specific tcg
> > support for
> >   128-bit aligned atomics.
> >   Given my understanding of that part of QEMU, I choose the easy way to
> > disable
> >   it once and for all at compile time until we have that.
>
>
> In rv128_base_cpu_init():
>
>   if (qemu_tcg_mttcg_enabled) {
>   /* Missing 128-bit aligned atomics */
>   error_report("128-bit RISC-V currently does not work"
>" with Multi Threaded TCG. Please use:"
>" -accel tcg,thread=single");
>   exit(EXIT_FAILURE);
>   }

That seems like a good option! I think we could add this to the CPU
realise function.

The problem with a riscv128-sofmmu executable is that it's hard to get
rid of in the future. We are very slowly moving towards a single
executable and adding a new one means we are stuck with it for a
while.

Alistair

>
> Regards,
>
> Phil.

Re: [PATCH v4 3/3] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196

2021-11-25 Thread Hanna Reitz


On 24.11.21 17:15, Philippe Mathieu-Daudé wrote:

Without the previous commit, when running 'make check-qtest-i386'
with QEMU configured with '--enable-sanitizers' we get:

   AddressSanitizer:DEADLYSIGNAL
   =
   ==287878==ERROR: AddressSanitizer: SEGV on unknown address 0x0344
   ==287878==The signal is caused by a WRITE memory access.
   ==287878==Hint: address points to the zero page.
   #0 0x564b2e5bac27 in blk_inc_in_flight block/block-backend.c:1346:5
   #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5
   #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11
   #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17
   #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9
   #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9

Add the reproducer for CVE-2021-20196.

Suggested-by: Alexander Bulekov 
Reviewed-by: Darren Kenny 
Signed-off-by: Philippe Mathieu-Daudé 
---
  tests/qtest/fdc-test.c | 38 ++
  1 file changed, 38 insertions(+)

diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c
index 26b69f7c5cd..8f6eee84a47 100644
--- a/tests/qtest/fdc-test.c
+++ b/tests/qtest/fdc-test.c
@@ -32,6 +32,9 @@
  /* TODO actually test the results and get rid of this */
  #define qmp_discard_response(...) qobject_unref(qmp(__VA_ARGS__))
  
+#define DRIVE_FLOPPY_BLANK \

+"-drive 
if=floppy,file=null-co://,file.read-zeroes=on,format=raw,size=1440k"
+
  #define TEST_IMAGE_SIZE 1440 * 1024
  
  #define FLOPPY_BASE 0x3f0

@@ -546,6 +549,40 @@ static void fuzz_registers(void)
  }
  }
  
+static bool qtest_check_clang_sanitizer(void)

+{
+#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
+return true;
+#else
+g_test_skip("QEMU not configured using --enable-sanitizers");
+return false;
+#endif
+}
+static void test_cve_2021_20196(void)
+{
+QTestState *s;
+
+if (!qtest_check_clang_sanitizer()) {
+return;
+}
+
+s = qtest_initf("-nographic -m 32M -nodefaults " DRIVE_FLOPPY_BLANK);
+
+qtest_outw(s, 0x3f4, 0x0500);
+qtest_outb(s, 0x3f5, 0x00);
+qtest_outb(s, 0x3f5, 0x00);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outb(s, 0x3f5, 0x00);
+qtest_outw(s, 0x3f1, 0x0400);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outb(s, 0x3f5, 0x00);
+qtest_outb(s, 0x3f5, 0x01);
+qtest_outw(s, 0x3f1, 0x0500);
+qtest_outb(s, 0x3f5, 0x00);
+qtest_quit(s);
+}
+


Now this works as a reproducer for me, but... this is a completely 
different I/O sequence now, right?


Can’t complain, though, I didn’t understand the previous one, I can’t 
claim I need to understand this one or why they’re different.


All the rest looks good to me, so all in all:

Reviewed-by: Hanna Reitz

Re: [PATCH v2 12/15] hw/nvme: Initialize capability structures for primary/secondary controllers

2021-11-25 Thread Łukasz Gieryk

On Wed, Nov 24, 2021 at 03:26:30PM +0100, Łukasz Gieryk wrote:
> On Wed, Nov 24, 2021 at 09:04:31AM +0100, Klaus Jensen wrote:
> > On Nov 16 16:34, Łukasz Gieryk wrote:
> > > With four new properties:
> > >  - sriov_v{i,q}_flexible,
> > >  - sriov_max_v{i,q}_per_vf,
> > > one can configure the number of available flexible resources, as well as
> > > the limits. The primary and secondary controller capability structures
> > > are initialized accordingly.
> > > 
> > > Since the number of available queues (interrupts) now varies between
> > > VF/PF, BAR size calculation is also adjusted.
> > > 
> > > Signed-off-by: Łukasz Gieryk 
> > > ---
> > >  hw/nvme/ctrl.c   | 138 ---
> > >  hw/nvme/nvme.h   |   4 ++
> > >  include/block/nvme.h |   5 ++
> > >  3 files changed, 140 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > > index f8f5dfe204..f589ffde59 100644
> > > --- a/hw/nvme/ctrl.c
> > > +++ b/hw/nvme/ctrl.c
> > > @@ -6358,13 +6444,40 @@ static void nvme_init_state(NvmeCtrl *n)
> > >  n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > >  n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
> > >  
> > > -list->numcntl = cpu_to_le16(n->params.sriov_max_vfs);
> > > -for (i = 0; i < n->params.sriov_max_vfs; i++) {
> > > +list->numcntl = cpu_to_le16(max_vfs);
> > > +for (i = 0; i < max_vfs; i++) {
> > >  sctrl = &list->sec[i];
> > >  sctrl->pcid = cpu_to_le16(n->cntlid);
> > >  }
> > >  
> > >  cap->cntlid = cpu_to_le16(n->cntlid);
> > > +cap->crt = NVME_CRT_VQ | NVME_CRT_VI;
> > > +
> > > +if (pci_is_vf(&n->parent_obj)) {
> > > +cap->vqprt = cpu_to_le16(1 + n->conf_ioqpairs);
> > > +} else {
> > > +cap->vqprt = cpu_to_le16(1 + n->params.max_ioqpairs -
> > > + n->params.sriov_vq_flexible);
> > > +cap->vqfrt = cpu_to_le32(n->params.sriov_vq_flexible);
> > > +cap->vqrfap = cap->vqfrt;
> > > +cap->vqgran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
> > > +cap->vqfrsm = n->params.sriov_max_vq_per_vf ?
> > > +cpu_to_le16(n->params.sriov_max_vq_per_vf) :
> > > +cap->vqprt;
> > 
> > That this defaults to VQPRT doesn't seem right. It should default to
> > VQFRT. Does not make sense to report a maximum number of assignable
> > flexible resources that are bigger than the number of flexible resources
> > available.
> 
> I’ve explained in on of v1 threads why I think using the current default
> is better than VQPRT.
> 
> What you’ve noticed is indeed an inconvenience, but it’s – at least in
> my opinion – part of the design. What matters is the current number of
> unassigned flexible resources. It may be lower than VQFRSM due to
> multiple reasons:
>  1) resources are bound to PF, 
>  2) resources are bound to other VFs,
>  3) resources simply don’t exist (not baked in silicone: VQFRT < VQFRSM).
> 
> If 1) and 2) are allowed to happen, and the user must be aware of that,
> then why 3) shouldn’t?
> 

I’ve done some more thinking, and now I’m not happy with my version, nor
the suggested VQPRT.

How about using this formula instead?:

v{q,i}frsm = sriov_max_v{I,q}_per_vf ? sriov_max_v{I,q}_per_vf :
 floor(sriov_v{i,q}_flexible / sriov_max_vfs)

v{q,i}frsm would end up with values similar/proportional to those
reported by and actual SR-IOV-capable device available on the market.

[PATCH-for-6.2] docs: add a word of caution on x-native-hotplug property for pcie-root-ports

2021-11-25 Thread Ani Sinha

x-native-hotplug property, when used in order to disable HPC bit on the PCIE
root ports, can lead to unexpected results from the guest operating system.
Users are strongly advised not to touch this property in order to manipulte the
HPC bit. Add a word of caution in the pcie.txt doc file to document this.

Signed-off-by: Ani Sinha 
---
 docs/pcie.txt | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/docs/pcie.txt b/docs/pcie.txt
index 89e3502075..e1f99f725f 100644
--- a/docs/pcie.txt
+++ b/docs/pcie.txt
@@ -262,11 +262,26 @@ PCI Express Root Ports (and PCI Express Downstream Ports).
 Port, which may come handy for hot-plugging another device.
 
 
-5.3 Hot-plug example:
+5.2 Hot-plug example:
 Using HMP: (add -monitor stdio to QEMU command line)
   device_add ,id=,bus=
 
 
+5.3 A word of caution using hotplug on PCI Express Root Ports:
+Starting Qemu version 6.2, PCI Express Root ports have a property
+"x-native-hotplug" ("native-hotplug" for Qemu version 6.1), that can be used to
+enable or disable hotplug on that port. For example:
+
+-device pcie-root-port,x-native-hotplug=off,... etc.
+
+The "x-" prefix indicates that this property is highly experimental and can
+lead to unexpected results from the guest operating system if users try to use
+it to alter the native hotplug on the port. It also means that the property
+name and its behavior is liable to change in the future and is not expected to
+be stable across Qemu versions. Therefore, end users are advised not to change
+the value of this option from its default set value or use it in the Qemu
+command line.
+
 6. Device assignment
 
 Host devices are mostly PCI Express and should be plugged only into
-- 
2.25.1

Re: [PATCH v4 3/3] tests/qtest/fdc-test: Add a regression test for CVE-2021-20196

2021-11-25 Thread Philippe Mathieu-Daudé

On 11/25/21 12:57, Hanna Reitz wrote:
> On 24.11.21 17:15, Philippe Mathieu-Daudé wrote:
>> Without the previous commit, when running 'make check-qtest-i386'
>> with QEMU configured with '--enable-sanitizers' we get:
>>
>>    AddressSanitizer:DEADLYSIGNAL
>>    =
>>    ==287878==ERROR: AddressSanitizer: SEGV on unknown address
>> 0x0344
>>    ==287878==The signal is caused by a WRITE memory access.
>>    ==287878==Hint: address points to the zero page.
>>    #0 0x564b2e5bac27 in blk_inc_in_flight
>> block/block-backend.c:1346:5
>>    #1 0x564b2e5bb228 in blk_pwritev_part block/block-backend.c:1317:5
>>    #2 0x564b2e5bcd57 in blk_pwrite block/block-backend.c:1498:11
>>    #3 0x564b2ca1cdd3 in fdctrl_write_data hw/block/fdc.c:2221:17
>>    #4 0x564b2ca1b2f7 in fdctrl_write hw/block/fdc.c:829:9
>>    #5 0x564b2dc49503 in portio_write softmmu/ioport.c:201:9
>>
>> Add the reproducer for CVE-2021-20196.
>>
>> Suggested-by: Alexander Bulekov 
>> Reviewed-by: Darren Kenny 
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>>   tests/qtest/fdc-test.c | 38 ++
>>   1 file changed, 38 insertions(+)
>>
>> diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c
>> index 26b69f7c5cd..8f6eee84a47 100644
>> --- a/tests/qtest/fdc-test.c
>> +++ b/tests/qtest/fdc-test.c
>> @@ -32,6 +32,9 @@
>>   /* TODO actually test the results and get rid of this */
>>   #define qmp_discard_response(...) qobject_unref(qmp(__VA_ARGS__))
>>   +#define DRIVE_FLOPPY_BLANK \
>> +    "-drive
>> if=floppy,file=null-co://,file.read-zeroes=on,format=raw,size=1440k"
>> +
>>   #define TEST_IMAGE_SIZE 1440 * 1024
>>     #define FLOPPY_BASE 0x3f0
>> @@ -546,6 +549,40 @@ static void fuzz_registers(void)
>>   }
>>   }
>>   +static bool qtest_check_clang_sanitizer(void)
>> +{
>> +#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
>> +    return true;
>> +#else
>> +    g_test_skip("QEMU not configured using --enable-sanitizers");
>> +    return false;
>> +#endif
>> +}
>> +static void test_cve_2021_20196(void)
>> +{
>> +    QTestState *s;
>> +
>> +    if (!qtest_check_clang_sanitizer()) {
>> +    return;
>> +    }
>> +
>> +    s = qtest_initf("-nographic -m 32M -nodefaults "
>> DRIVE_FLOPPY_BLANK);
>> +
>> +    qtest_outw(s, 0x3f4, 0x0500);
>> +    qtest_outb(s, 0x3f5, 0x00);
>> +    qtest_outb(s, 0x3f5, 0x00);
>> +    qtest_outw(s, 0x3f4, 0x);
>> +    qtest_outb(s, 0x3f5, 0x00);
>> +    qtest_outw(s, 0x3f1, 0x0400);
>> +    qtest_outw(s, 0x3f4, 0x);
>> +    qtest_outw(s, 0x3f4, 0x);
>> +    qtest_outb(s, 0x3f5, 0x00);
>> +    qtest_outb(s, 0x3f5, 0x01);
>> +    qtest_outw(s, 0x3f1, 0x0500);
>> +    qtest_outb(s, 0x3f5, 0x00);
>> +    qtest_quit(s);
>> +}
>> +
> 
> Now this works as a reproducer for me, but... this is a completely
> different I/O sequence now, right?

The patch Alexander sent [*] was indeed not working, but I could
manually reproduce, then I figure while the commit *description*
was working, the patch *content* was not accurate. This patch uses
the commit description.

[1] https://www.mail-archive.com/qemu-block@nongnu.org/msg82825.html

> Can’t complain, though, I didn’t understand the previous one, I can’t
> claim I need to understand this one or why they’re different.

Same here =)

> All the rest looks good to me, so all in all:
> 
> Reviewed-by: Hanna Reitz 

Thank you!

Re: [PATCH v5 3/3] cpus-common: implement dirty limit on vCPU

2021-11-25 Thread Hyman Huang





在 2021/11/24 23:33, Markus Armbruster 写道:

huang...@chinatelecom.cn writes:


From: Hyman Huang(黄勇) 

Implement dirtyrate calculation periodically basing on
dirty-ring and throttle vCPU until it reachs the quota
dirtyrate given by user.

Introduce qmp commands set-dirty-limit/cancel-dirty-limit to
set/cancel dirty limit on vCPU.

Signed-off-by: Hyman Huang(黄勇) 
---
  cpus-common.c | 41 +
  include/hw/core/cpu.h |  9 +
  qapi/migration.json   | 43 +++
  softmmu/vl.c  |  1 +
  4 files changed, 94 insertions(+)

diff --git a/cpus-common.c b/cpus-common.c
index 6e73d3e..43b0078 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -23,6 +23,11 @@
  #include "hw/core/cpu.h"
  #include "sysemu/cpus.h"
  #include "qemu/lockable.h"
+#include "sysemu/dirtylimit.h"
+#include "sysemu/cpu-throttle.h"
+#include "sysemu/kvm.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-migration.h"
  
  static QemuMutex qemu_cpu_list_lock;

  static QemuCond exclusive_cond;
@@ -352,3 +357,39 @@ void process_queued_cpu_work(CPUState *cpu)
  qemu_mutex_unlock(&cpu->work_mutex);
  qemu_cond_broadcast(&qemu_work_cond);
  }
+
+void qmp_set_dirty_limit(int64_t idx,
+ uint64_t dirtyrate,
+ Error **errp)
+{
+if (!kvm_dirty_ring_enabled()) {
+error_setg(errp, "dirty ring not enable, needed by dirty restraint!");


"not enabled"

What is a "dirty restraint"?

Drop the exclamation point, please.  See error.h:

  * The resulting message should be a single phrase, with no newline or
  * trailing punctuation.

What about "setting a dirty page limit requires ...".

Ok, sound good



+return;
+}
+
+dirtylimit_calc();
+dirtylimit_vcpu(idx, dirtyrate);
+}
+
+void qmp_cancel_dirty_limit(int64_t idx,
+Error **errp)
+{
+if (!kvm_dirty_ring_enabled()) {
+error_setg(errp, "dirty ring not enable, needed by dirty restraint!");
+return;
+}
+
+if (unlikely(!dirtylimit_cancel_vcpu(idx))) {
+dirtylimit_calc_quit();
+}
+}
+
+void dirtylimit_setup(int max_cpus)
+{
+if (!kvm_dirty_ring_enabled()) {


This crashes unless the accelerator is kvm.  Reproducer:

 $ qemu-system-x86_64 -display none -accel tcg
 Segmentation fault (core dumped)


Thanks very much for finding this issue, i'll fix it next version


+return;
+}
+
+dirtylimit_calc_state_init(max_cpus);
+dirtylimit_state_init(max_cpus);
+}
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index e948e81..11df012 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -881,6 +881,15 @@ void end_exclusive(void);
   */
  void qemu_init_vcpu(CPUState *cpu);
  
+/**

+ * dirtylimit_setup:
+ *
+ * Initializes the global state of dirtylimit calculation and
+ * dirtylimit itself. This is prepared for vCPU dirtylimit which
+ * could be triggered during vm lifecycle.
+ */
+void dirtylimit_setup(int max_cpus);
+
  #define SSTEP_ENABLE  0x1  /* Enable simulated HW single stepping */
  #define SSTEP_NOIRQ   0x2  /* Do not use IRQ while single stepping */
  #define SSTEP_NOTIMER 0x4  /* Do not Timers while single stepping */
diff --git a/qapi/migration.json b/qapi/migration.json
index bbfd48c..42b260e 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1850,6 +1850,49 @@
  { 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }
  
  ##

+# @set-dirty-limit:
+#
+# Set the upper limit of dirty page rate for the interested vCPU.


"for a vCPU"


+#
+# This command could be used to cap the vCPU memory load, which is also
+# refered as "dirty page rate". Users can use set-dirty-limit unconditionally,
+# but if one want to know which vCPU is in high memory load and which vCPU
+# should be limited, "calc-dirty-rate" with "dirty-ring" mode maybe an
+# availiable method.


I think you should mention that the command fails unless dirty ring is
enabled, and a pointer to its documentation.


Emm, it seems that there's no documentation about dirty ring in qemu，
should i metion the commit b4420f19 "KVM: Dirty ring support" for 
dirty-ring?

+#
+# @idx: vCPU index to set dirtylimit.


Please rename to @cpu-index for consistency with query-cpus-fast.  Same
for cancel-dirty-limit below.


+#
+# @dirtyrate: upper limit for the specified vCPU's dirty page rate (MB/s)


In QMP, we separate words with hyphens, like @dirty-rate.  Please
rename.


+#
+# Since: 6.3


7.0


+#
+# Example:
+#   {"execute": "set-dirty-limit"}
+#"arguments": { "idx": 0,
+#   "dirtyrate": 200 } }
+#
+##
+{ 'command': 'set-dirty-limit',
+  'data': { 'idx': 'int', 'dirtyrate': 'uint64' } }
+
+##
+# @cancel-dirty-limit:
+#
+# Cancel the dirtylimit for the vCPU which has been set with set-dirty-limit.


"the dirty page limit"


+#
+# @idx: vCPU index to canceled the dirtylimit
+#
+# Since: 6.3


7.0


+#
+# Example:
+#   {"e

Re: SEV guest attestation

2021-11-25 Thread Dov Murik

[+cc jejb, tobin, jim, hubertus]


On 25/11/2021 9:14, Sergio Lopez wrote:
> On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
>> * Daniel P. Berrangé (berra...@redhat.com) wrote:
>>> On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
 Hi,

 We recently discussed a way for remote SEV guest attestation through QEMU.
 My initial approach was to get data needed for attestation through 
 different
 QMP commands (all of which are already available, so no changes required
 there), deriving hashes and certificate data; and collecting all of this
 into a new QMP struct (SevLaunchStart, which would include the VM's policy,
 secret, and GPA) which would need to be upstreamed into QEMU. Once this is
 provided, QEMU would then need to have support for attestation before a VM
 is started. Upon speaking to Dave about this proposal, he mentioned that
 this may not be the best approach, as some situations would render the
 attestation unavailable, such as the instance where a VM is running in a
 cloud, and a guest owner would like to perform attestation via QMP (a 
 likely
 scenario), yet a cloud provider cannot simply let anyone pass arbitrary QMP
 commands, as this could be an issue.
>>>
>>> As a general point, QMP is a low level QEMU implementation detail,
>>> which is generally expected to be consumed exclusively on the host
>>> by a privileged mgmt layer, which will in turn expose its own higher
>>> level APIs to users or other apps. I would not expect to see QMP
>>> exposed to anything outside of the privileged host layer.
>>>
>>> We also use the QAPI protocol for QEMU guest agent commmunication,
>>> however, that is a distinct service from QMP on the host. It shares
>>> most infra with QMP but has a completely diffent command set. On the
>>> host it is not consumed inside QEMU, but instead consumed by a
>>> mgmt app like libvirt. 
>>>
 So I ask, does anyone involved in QEMU's SEV implementation have any input
 on a quality way to perform guest attestation? If so, I'd be interested.
>>>
>>> I think what's missing is some clearer illustrations of how this
>>> feature is expected to be consumed in some real world application
>>> and the use cases we're trying to solve.
>>>
>>> I'd like to understand how it should fit in with common libvirt
>>> applications across the different virtualization management
>>> scenarios - eg virsh (command line),  virt-manger (local desktop
>>> GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
>>> And of course any non-traditional virt use cases that might be
>>> relevant such as Kata.
>>
>> That's still not that clear; I know Alice and Sergio have some ideas
>> (cc'd).
>> There's also some standardisation efforts (e.g. 
>> https://www.potaroo.net/ietf/html/ids-wg-rats.html 
>> and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
>> ) - that I can't claim to fully understand.
>> However, there are some themes that are emerging:
>>
>>   a) One use is to only allow a VM to access some private data once we
>> prove it's the VM we expect running in a secure/confidential system
>>   b) (a) normally involves requesting some proof from the VM and then
>> providing it some confidential data/a key if it's OK
>>   c) RATs splits the problem up:
>> 
>> https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html#name-architectural-overview
>> I don't fully understand the split yet, but in principal there are
>> at least a few different things:
>>
>>   d) The comms layer
>>   e) Something that validates the attestation message (i.e. the
>> signatures are valid, the hashes all add up etc)
>>   f) Something that knows what hashes to expect (i.e. oh that's a RHEL
>> 8.4 kernel, or that's a valid kernel command line)
>>   g) Something that holds some secrets that can be handed out if e & f
>> are happy.
>>
>>   There have also been proposals (e.g. Intel HTTPA) for an attestable
>> connection after a VM is running; that's probably quite different from
>> (g) but still involves (e) & (f).
>>
>> In the simpler setups d,e,f,g probably live in one place; but it's not
>> clear where they live - for example one scenario says that your cloud
>> management layer holds some of them, another says you don't trust your
>> cloud management layer and you keep them separate.
>>
>> So I think all we're actually interested in at the moment, is (d) and
>> (e) and the way for (g) to get the secret back to the guest.
>>
>> Unfortunately the comms and the contents of them varies heavily with
>> technology; in some you're talking to the qemu/hypervisor (SEV/SEV-ES)
>> while in some you're talking to the guest after boot (SEV-SNP/TDX maybe
>> SEV-ES in some cases).

SEV-ES has pre-launch measurement and secret injection, just like SEV
(except that the measurement includes the initial states of all vcpus,
that is, their VMSAs.  BTW that means that in order to calculate

Re: [RFC PATCH v3 0/5] QMP support for cold-plugging devices

2021-11-25 Thread Damien Hedde

On 11/24/21 15:51, Markus Armbruster wrote:

Daniel P. Berrangé  writes:

On Wed, Nov 24, 2021 at 02:50:11PM +0100, Markus Armbruster wrote:

Damien Hedde  writes:

The biggest difference is the fw_cfg option I think: it is related
with the rom_set_order_override()/rom_reset_order_override() (line 17
and 25). There is also the usb devices parts in between. I lack the
knowledge about fw_cfg/usb to tell if it is important or not.

What I wanted to say is I don't know if the difference is
acceptable. If we want device_add to support all -device use cases, it
is not. In that case we need to stop either in the middle of this
function (line 15) or at the end (better with your sketch in mind).

Note that rom_set_order_override()/rom_reset_order_override() only
set/reset a switch variable that changes how fw_cfg files are
sorted. It could be integrated into device_add code (and removed from
the above function) without changing the behavior.

For me, the part that puts me off is interleaving CLI and QMP.

We process the CLI in an order few people understand, and only while
staring at the code.  That's bad.

Injecting QMP at certain points in that sequence can only make it worse.

Yep, I share your unease here.. especially wrt this quoted text
from later:

   > >> Users can do as much or as little with the CLI as they want.  You'd
   > >> probably want to set up a QMP monitor and no more.

I would say that is a case of overkill. It can only make our
lives harder as maintainers in the long term, if we have to
worry about such arbitrary mixing of QMP and CLI. This is
also why I'm pretty uneasy about the 'preconfig' stuff as
implemented today in general.

It is a half-way house that doesn't really give mgmt apps
what they want, which is a 100% QAPI-only config. If mgmt
apps start using preconfig, it won't make life any better
for them and will also lock QEMU maintainers into supporting
this half-way house.

Misunderstanding?  The paragraph you quoted is about this design:

 1. Start event loop

 2. Feed it CLI left to right.  Each option runs a handler just like each

 QMP command does.

 Options that read a configuration file inject the file into the feed.

 Options that create a monitor create it suspended.

 Options may advance the phase / run state, and they may require

 certain phase(s).

 3. When we're done with CLI, resume any monitors we created.

 4. Monitors now feed commands to the event loop.  Commands may advance

 the phase / run state, and they may require certain phase(s).

 Users can do as much or as little with the CLI as they want.  You'd

 probably want to set up a QMP monitor and no more.

 device_add becomes possible at a certain state of the phase / run state

 machine.  It changes from cold to hot plug at a certain later state.

Certainly enables 100% QAPI-only config.  It just doesn't *force* you to
100%.  Feature.

We have a bit of a track record with QEMU of introducing
partial solutions and never quite finishing the job. There's
little strong incentive to ever finish it, if you can freely
mix both old and new style forever, and thus maintainers are
burdened forever with both.

IMHO, we should only try to support the non-mixed scenarios

   - 100% of hardware configured via CLI args
   - 100% of hardware configured via QAPI (whether live in
 QMP, or fed in via a QAPI based JSON/YAML config file)

so that we only have two clear cases we need to worry about
dealing with.

Focus our efforts 100% of the 100% QAPI scenario and don't
divert energy into short term hybrid solutions.

The design above pretty much requires 100% QAPI.

It's based on the notion that there's no real difference between a CLI
option and a QMP command that doesn't return a value.  So treat the CLI
more like a monitor.

For sanity's sake, make it not race with the other monitors by starting
them suspended.

This design is arguably *less* hybrid than one that treats a (severely
dumbed down) CLI unlike a monitor.

It seems there is a big gap from where we are now to a full QAPI startup 
support.
Could we adopt a plan which would allow us to progress from where we are 
to full QAPI support in small steps ?

For example, the following:

 1. CLI/QMP interleaving seems to be big issue right now. We could 
solve this by making -preconfig stop only after all CLI options are 
"consumed".
For example if you give -preconfig and some -device, qemu won't stop 
before having created the devices.

Meaning you would do
$qemu [out-of-order CLI with -preconfig] then jump into the monitors.

Depending on your use case, you would have to give a few CLI options so 
that -preconfig stops early enough.

 2. Then we can enable QMP commands one by one corresponding to 
unsupported and needed/cleaned up CLI options. They will check and/or 
advance the phase/runstate.

Basically this would mean we have to first c

Re: [PATCH] linux-user: move target_signal.h generic definitions to generic/signal.h

2021-11-25 Thread gaosong


Hi,
On 2021/11/25 下午6:08, WANG Xuerui wrote:

+
+#define TARGET_MINSIGSTKSZ 2048

While all the architectures you de-duplicated here have
TARGET_MINSIGSTACKSZ as 2048, some others specify a different value
(mostly 4096, e.g. alpha), as can be seen in your next patch (which
should belong to this series, btw).

Surely   I 'll add a patch to delete TARGET_SIGSTKSZ.

  Do you mean to change semantics
here? Or you might have to allow arches to override this value.


mips64, sparc, alpha, mips, hppa, their signal definitions are defined 
in /linux-user/XXX/target_signal.h,
but their target_signal.h don't include generic/signal.h.  they don't 
use generic/signal.h at all.
It's hard to move their some generic definitions to generic/signal.h,  
because their definitions are too defferent with generic.


Thanks
Song Gao

Re: [RFC PATCH v3 0/5] QMP support for cold-plugging devices

2021-11-25 Thread Daniel P . Berrangé

On Wed, Nov 24, 2021 at 03:51:23PM +0100, Markus Armbruster wrote:
> Daniel P. Berrangé  writes:
> 
> > On Wed, Nov 24, 2021 at 02:50:11PM +0100, Markus Armbruster wrote:
> >> Damien Hedde  writes:
> >> 
> >> > The biggest difference is the fw_cfg option I think: it is related
> >> > with the rom_set_order_override()/rom_reset_order_override() (line 17
> >> > and 25). There is also the usb devices parts in between. I lack the 
> >> > knowledge about fw_cfg/usb to tell if it is important or not.
> >> >
> >> > What I wanted to say is I don't know if the difference is
> >> > acceptable. If we want device_add to support all -device use cases, it
> >> > is not. In that case we need to stop either in the middle of this
> >> > function (line 15) or at the end (better with your sketch in mind).
> >> >
> >> > Note that rom_set_order_override()/rom_reset_order_override() only
> >> > set/reset a switch variable that changes how fw_cfg files are
> >> > sorted. It could be integrated into device_add code (and removed from
> >> > the above function) without changing the behavior.
> >> 
> >> For me, the part that puts me off is interleaving CLI and QMP.
> >> 
> >> We process the CLI in an order few people understand, and only while
> >> staring at the code.  That's bad.
> >> 
> >> Injecting QMP at certain points in that sequence can only make it worse.
> >
> > Yep, I share your unease here.. especially wrt this quoted text
> > from later:
> >
> >   > >> Users can do as much or as little with the CLI as they want.  You'd
> >   > >> probably want to set up a QMP monitor and no more.
> >
> > I would say that is a case of overkill. It can only make our
> > lives harder as maintainers in the long term, if we have to
> > worry about such arbitrary mixing of QMP and CLI. This is
> > also why I'm pretty uneasy about the 'preconfig' stuff as
> > implemented today in general.
> >
> > It is a half-way house that doesn't really give mgmt apps
> > what they want, which is a 100% QAPI-only config. If mgmt
> > apps start using preconfig, it won't make life any better
> > for them and will also lock QEMU maintainers into supporting
> > this half-way house.
> 
> Misunderstanding?  The paragraph you quoted is about this design:
> 
> 1. Start event loop
> 
> 2. Feed it CLI left to right.  Each option runs a handler just like each
> QMP command does.
> 
> Options that read a configuration file inject the file into the feed.
> 
> Options that create a monitor create it suspended.
> 
> Options may advance the phase / run state, and they may require
> certain phase(s).
> 
> 3. When we're done with CLI, resume any monitors we created.
> 
> 4. Monitors now feed commands to the event loop.  Commands may advance
> the phase / run state, and they may require certain phase(s).
> 
> Users can do as much or as little with the CLI as they want.  You'd
> probably want to set up a QMP monitor and no more.
> 
> device_add becomes possible at a certain state of the phase / run state
> machine.  It changes from cold to hot plug at a certain later state.
> 
> Certainly enables 100% QAPI-only config.  It just doesn't *force* you to
> 100%.  Feature.

This is far away from how our CLI handling works today, since we don't
require left-to-right args. Converting existing binaries to this
approach is going to be hard with a high risk of regressions. This
is especiall true if we try todo an incremental conversion, of
different pieces of the CLI and allow arbitrary mixing of CLI and
QMP throughout.

IMHO a pre-requisite for changing CLI arg processing ordering, is
a fresh binary that leaves QemuOpts behind for its CLI, so any
usage is consistent with QAPI. 

> > We have a bit of a track record with QEMU of introducing
> > partial solutions and never quite finishing the job. There's
> > little strong incentive to ever finish it, if you can freely
> > mix both old and new style forever, and thus maintainers are
> > burdened forever with both.
> >
> > IMHO, we should only try to support the non-mixed scenarios
> >
> >   - 100% of hardware configured via CLI args
> >   - 100% of hardware configured via QAPI (whether live in
> > QMP, or fed in via a QAPI based JSON/YAML config file)
> >
> > so that we only have two clear cases we need to worry about
> > dealing with.
> >
> > Focus our efforts 100% of the 100% QAPI scenario and don't
> > divert energy into short term hybrid solutions.
> 
> The design above pretty much requires 100% QAPI.
> 
> It's based on the notion that there's no real difference between a CLI
> option and a QMP command that doesn't return a value.  So treat the CLI
> more like a monitor.
> 
> For sanity's sake, make it not race with the other monitors by starting
> them suspended.
> 
> This design is arguably *less* hybrid than one that treats a (severely
> dumbed down) CLI unlike a monitor.

Yes, my concern is more about how that gets in

Re: Questions about losing the write lock of raw-format disks after migration

2021-11-25 Thread Hanna Reitz


On 24.11.21 13:56, Peng Liang via wrote:

Hi folks,

When we test migration with raw-format disk, we found that the QEMU
process in the dst will lose the write lock after migration.  However,
the QEMU process in the dst will still hold the write lock for
qcow2-format disk.

After reading some block layer's code, I found that the first
blk_set_perm in blk_root_activate will set blk->shared_perm to
BLK_PERM_ALL (disable all shared permissions?).  Then in
blk_vm_state_changed, blk_set_perm will set shared_perm to
blk->shared_perm, which is BLK_PERM_ALL.  And it makes
raw_handle_perm_lock not to get the write lock.

So I try the following patch and it will fix the problem:
diff --git a/block/block-backend.c b/block/block-backend.c
index 12ef80ea17..96518fd1f0 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -197,13 +197,6 @@ static void blk_root_activate(BdrvChild *child,
Error **errp)

  blk->disable_perm = false;

-blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
-if (local_err) {
-error_propagate(errp, local_err);
-blk->disable_perm = true;
-return;
-}
-
  if (runstate_check(RUN_STATE_INMIGRATE)) {
  /* Activation can happen when migration process is still
active, for
   * example when nbd_server_add is called during non-shared storage

I'm new to the block layer and I'm not sure that it's a right fix to the
problem.  Any idea about the problem and the patch?


Hi Peng,

Thanks for your report!  I can reproduce this problem.

It appears to me the problem is that blk_set_perm(), well, sets 
blk->perm and blk->shared_perm.  So by once calling it with 
BLK_PERM_ALL, we override blk->shared_perm to from then on be 
BLK_PERM_ALL, even though the guest device has not set that at all. We 
later call blk_set_perm(blk->blk_perm, blk->shared_perm) (in 
blk_vm_state_changed()), however, blk->shared_perm is now BLK_PERM_ALL, 
so this is a no-op.  That means that any restrictions the guest device 
has imposed (like the default share-rw=off) is not reflected in the 
block device’s permissions.


This is not apparent with qcow2, because the qcow2 format imposes its 
own restrictions in addition to the guest device.


I think the right way to fix this is to save blk->shared_perm somewhere 
and then restore it after the blk_set_perm(BLK_PERM_ALL) call.  I’ll 
send a patch (with a test case).


Hanna

unable to execute QEMU command 'qom-get': Property 'sgx-epc.unavailable-features' not found

2021-11-25 Thread Yang Zhong

Hello Paolo,

Our customer used the Libvirt XML to start a SGX VM, but failed.

libvirt.libvirtError: internal error: unable to execute QEMU command 'qom-get': 
Property 'sgx-epc.unavailable-features' not found

The XML file,







  

The new compound property command should be located in /machine path,
which are different with old command '-sgx-epc id=epc1,memdev=mem1'.

I also tried this from Qemu monitor tool, 
(qemu) qom-list /machine
type (string)
kernel (string)
..
sgx-epc (SgxEPC)
..
sgx-epc[0] (child)
..

We can find sgx-epc from /machine list.

I am not familiar with Libvirt side, would you please suggest how to implement
this compound command in the XML file?  thanks a lot!

Regards,

Yang

Re: SEV guest attestation

2021-11-25 Thread Dr. David Alan Gilbert

* Sergio Lopez (s...@redhat.com) wrote:
> On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
> > * Daniel P. Berrangé (berra...@redhat.com) wrote:
> > > On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
> > > > Hi,
> > > > 
> > > > We recently discussed a way for remote SEV guest attestation through 
> > > > QEMU.
> > > > My initial approach was to get data needed for attestation through 
> > > > different
> > > > QMP commands (all of which are already available, so no changes required
> > > > there), deriving hashes and certificate data; and collecting all of this
> > > > into a new QMP struct (SevLaunchStart, which would include the VM's 
> > > > policy,
> > > > secret, and GPA) which would need to be upstreamed into QEMU. Once this 
> > > > is
> > > > provided, QEMU would then need to have support for attestation before a 
> > > > VM
> > > > is started. Upon speaking to Dave about this proposal, he mentioned that
> > > > this may not be the best approach, as some situations would render the
> > > > attestation unavailable, such as the instance where a VM is running in a
> > > > cloud, and a guest owner would like to perform attestation via QMP (a 
> > > > likely
> > > > scenario), yet a cloud provider cannot simply let anyone pass arbitrary 
> > > > QMP
> > > > commands, as this could be an issue.
> > > 
> > > As a general point, QMP is a low level QEMU implementation detail,
> > > which is generally expected to be consumed exclusively on the host
> > > by a privileged mgmt layer, which will in turn expose its own higher
> > > level APIs to users or other apps. I would not expect to see QMP
> > > exposed to anything outside of the privileged host layer.
> > > 
> > > We also use the QAPI protocol for QEMU guest agent commmunication,
> > > however, that is a distinct service from QMP on the host. It shares
> > > most infra with QMP but has a completely diffent command set. On the
> > > host it is not consumed inside QEMU, but instead consumed by a
> > > mgmt app like libvirt. 
> > > 
> > > > So I ask, does anyone involved in QEMU's SEV implementation have any 
> > > > input
> > > > on a quality way to perform guest attestation? If so, I'd be interested.
> > > 
> > > I think what's missing is some clearer illustrations of how this
> > > feature is expected to be consumed in some real world application
> > > and the use cases we're trying to solve.
> > > 
> > > I'd like to understand how it should fit in with common libvirt
> > > applications across the different virtualization management
> > > scenarios - eg virsh (command line),  virt-manger (local desktop
> > > GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
> > > And of course any non-traditional virt use cases that might be
> > > relevant such as Kata.
> > 
> > That's still not that clear; I know Alice and Sergio have some ideas
> > (cc'd).
> > There's also some standardisation efforts (e.g. 
> > https://www.potaroo.net/ietf/html/ids-wg-rats.html 
> > and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
> > ) - that I can't claim to fully understand.
> > However, there are some themes that are emerging:
> > 
> >   a) One use is to only allow a VM to access some private data once we
> > prove it's the VM we expect running in a secure/confidential system
> >   b) (a) normally involves requesting some proof from the VM and then
> > providing it some confidential data/a key if it's OK
> >   c) RATs splits the problem up:
> > 
> > https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html#name-architectural-overview
> > I don't fully understand the split yet, but in principal there are
> > at least a few different things:
> > 
> >   d) The comms layer
> >   e) Something that validates the attestation message (i.e. the
> > signatures are valid, the hashes all add up etc)
> >   f) Something that knows what hashes to expect (i.e. oh that's a RHEL
> > 8.4 kernel, or that's a valid kernel command line)
> >   g) Something that holds some secrets that can be handed out if e & f
> > are happy.
> > 
> >   There have also been proposals (e.g. Intel HTTPA) for an attestable
> > connection after a VM is running; that's probably quite different from
> > (g) but still involves (e) & (f).
> > 
> > In the simpler setups d,e,f,g probably live in one place; but it's not
> > clear where they live - for example one scenario says that your cloud
> > management layer holds some of them, another says you don't trust your
> > cloud management layer and you keep them separate.
> > 
> > So I think all we're actually interested in at the moment, is (d) and
> > (e) and the way for (g) to get the secret back to the guest.
> > 
> > Unfortunately the comms and the contents of them varies heavily with
> > technology; in some you're talking to the qemu/hypervisor (SEV/SEV-ES)
> > while in some you're talking to the guest after boot (SEV-SNP/TDX maybe
> > SEV-ES in some cases).
> > 
> > So my expectat

Re: SEV guest attestation

2021-11-25 Thread Daniel P . Berrangé

On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berra...@redhat.com) wrote:
> > On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
> > > Hi,
> > > 
> > > We recently discussed a way for remote SEV guest attestation through QEMU.
> > > My initial approach was to get data needed for attestation through 
> > > different
> > > QMP commands (all of which are already available, so no changes required
> > > there), deriving hashes and certificate data; and collecting all of this
> > > into a new QMP struct (SevLaunchStart, which would include the VM's 
> > > policy,
> > > secret, and GPA) which would need to be upstreamed into QEMU. Once this is
> > > provided, QEMU would then need to have support for attestation before a VM
> > > is started. Upon speaking to Dave about this proposal, he mentioned that
> > > this may not be the best approach, as some situations would render the
> > > attestation unavailable, such as the instance where a VM is running in a
> > > cloud, and a guest owner would like to perform attestation via QMP (a 
> > > likely
> > > scenario), yet a cloud provider cannot simply let anyone pass arbitrary 
> > > QMP
> > > commands, as this could be an issue.
> > 
> > As a general point, QMP is a low level QEMU implementation detail,
> > which is generally expected to be consumed exclusively on the host
> > by a privileged mgmt layer, which will in turn expose its own higher
> > level APIs to users or other apps. I would not expect to see QMP
> > exposed to anything outside of the privileged host layer.
> > 
> > We also use the QAPI protocol for QEMU guest agent commmunication,
> > however, that is a distinct service from QMP on the host. It shares
> > most infra with QMP but has a completely diffent command set. On the
> > host it is not consumed inside QEMU, but instead consumed by a
> > mgmt app like libvirt. 
> > 
> > > So I ask, does anyone involved in QEMU's SEV implementation have any input
> > > on a quality way to perform guest attestation? If so, I'd be interested.
> > 
> > I think what's missing is some clearer illustrations of how this
> > feature is expected to be consumed in some real world application
> > and the use cases we're trying to solve.
> > 
> > I'd like to understand how it should fit in with common libvirt
> > applications across the different virtualization management
> > scenarios - eg virsh (command line),  virt-manger (local desktop
> > GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
> > And of course any non-traditional virt use cases that might be
> > relevant such as Kata.
> 
> That's still not that clear; I know Alice and Sergio have some ideas
> (cc'd).
> There's also some standardisation efforts (e.g. 
> https://www.potaroo.net/ietf/html/ids-wg-rats.html 
> and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
> ) - that I can't claim to fully understand.
> However, there are some themes that are emerging:
> 
>   a) One use is to only allow a VM to access some private data once we
> prove it's the VM we expect running in a secure/confidential system
>   b) (a) normally involves requesting some proof from the VM and then
> providing it some confidential data/a key if it's OK

I guess I'm wondering what the threat we're protecting against is,
and / or which pieces of the stack we can trust ?

eg, if the host has 2 VMs running, we verify the 1st and provide
its confidental data back to the host, what stops the host giving
that dat to the 2nd non-verified VM ? 

Presumably the data has to be encrypted with a key that is uniquely
tied to this specific boot attempt of the verified VM, and not
accessible to any other VM, or to future boots of this VM ?


>   c) RATs splits the problem up:
> 
> https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html#name-architectural-overview
> I don't fully understand the split yet, but in principal there are
> at least a few different things:
> 
>   d) The comms layer
>   e) Something that validates the attestation message (i.e. the
> signatures are valid, the hashes all add up etc)
>   f) Something that knows what hashes to expect (i.e. oh that's a RHEL
> 8.4 kernel, or that's a valid kernel command line)
>   g) Something that holds some secrets that can be handed out if e & f
> are happy.
> 
>   There have also been proposals (e.g. Intel HTTPA) for an attestable
> connection after a VM is running; that's probably quite different from
> (g) but still involves (e) & (f).
> 
> In the simpler setups d,e,f,g probably live in one place; but it's not
> clear where they live - for example one scenario says that your cloud
> management layer holds some of them, another says you don't trust your
> cloud management layer and you keep them separate.

Yep, again I'm wondering what the specific threats are that we're
trying to mitigate. Whether we trust the cloud mgmt APIs, but don't
trust the compute hosts, or whether we trust ne

Re: SEV guest attestation

2021-11-25 Thread Daniel P . Berrangé

On Thu, Nov 25, 2021 at 08:14:28AM +0100, Sergio Lopez wrote:
> On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
> > * Daniel P. Berrangé (berra...@redhat.com) wrote:
> > > On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
> > > > Hi,
> > > > 
> > > > We recently discussed a way for remote SEV guest attestation through 
> > > > QEMU.
> > > > My initial approach was to get data needed for attestation through 
> > > > different
> > > > QMP commands (all of which are already available, so no changes required
> > > > there), deriving hashes and certificate data; and collecting all of this
> > > > into a new QMP struct (SevLaunchStart, which would include the VM's 
> > > > policy,
> > > > secret, and GPA) which would need to be upstreamed into QEMU. Once this 
> > > > is
> > > > provided, QEMU would then need to have support for attestation before a 
> > > > VM
> > > > is started. Upon speaking to Dave about this proposal, he mentioned that
> > > > this may not be the best approach, as some situations would render the
> > > > attestation unavailable, such as the instance where a VM is running in a
> > > > cloud, and a guest owner would like to perform attestation via QMP (a 
> > > > likely
> > > > scenario), yet a cloud provider cannot simply let anyone pass arbitrary 
> > > > QMP
> > > > commands, as this could be an issue.
> > > 
> > > As a general point, QMP is a low level QEMU implementation detail,
> > > which is generally expected to be consumed exclusively on the host
> > > by a privileged mgmt layer, which will in turn expose its own higher
> > > level APIs to users or other apps. I would not expect to see QMP
> > > exposed to anything outside of the privileged host layer.
> > > 
> > > We also use the QAPI protocol for QEMU guest agent commmunication,
> > > however, that is a distinct service from QMP on the host. It shares
> > > most infra with QMP but has a completely diffent command set. On the
> > > host it is not consumed inside QEMU, but instead consumed by a
> > > mgmt app like libvirt. 
> > > 
> > > > So I ask, does anyone involved in QEMU's SEV implementation have any 
> > > > input
> > > > on a quality way to perform guest attestation? If so, I'd be interested.
> > > 
> > > I think what's missing is some clearer illustrations of how this
> > > feature is expected to be consumed in some real world application
> > > and the use cases we're trying to solve.
> > > 
> > > I'd like to understand how it should fit in with common libvirt
> > > applications across the different virtualization management
> > > scenarios - eg virsh (command line),  virt-manger (local desktop
> > > GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
> > > And of course any non-traditional virt use cases that might be
> > > relevant such as Kata.
> > 
> > That's still not that clear; I know Alice and Sergio have some ideas
> > (cc'd).
> > There's also some standardisation efforts (e.g. 
> > https://www.potaroo.net/ietf/html/ids-wg-rats.html 
> > and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
> > ) - that I can't claim to fully understand.
> > However, there are some themes that are emerging:
> > 
> >   a) One use is to only allow a VM to access some private data once we
> > prove it's the VM we expect running in a secure/confidential system
> >   b) (a) normally involves requesting some proof from the VM and then
> > providing it some confidential data/a key if it's OK
> >   c) RATs splits the problem up:
> > 
> > https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html#name-architectural-overview
> > I don't fully understand the split yet, but in principal there are
> > at least a few different things:
> > 
> >   d) The comms layer
> >   e) Something that validates the attestation message (i.e. the
> > signatures are valid, the hashes all add up etc)
> >   f) Something that knows what hashes to expect (i.e. oh that's a RHEL
> > 8.4 kernel, or that's a valid kernel command line)
> >   g) Something that holds some secrets that can be handed out if e & f
> > are happy.
> > 
> >   There have also been proposals (e.g. Intel HTTPA) for an attestable
> > connection after a VM is running; that's probably quite different from
> > (g) but still involves (e) & (f).
> > 
> > In the simpler setups d,e,f,g probably live in one place; but it's not
> > clear where they live - for example one scenario says that your cloud
> > management layer holds some of them, another says you don't trust your
> > cloud management layer and you keep them separate.
> > 
> > So I think all we're actually interested in at the moment, is (d) and
> > (e) and the way for (g) to get the secret back to the guest.
> > 
> > Unfortunately the comms and the contents of them varies heavily with
> > technology; in some you're talking to the qemu/hypervisor (SEV/SEV-ES)
> > while in some you're talking to the guest after boot (SEV-SNP/TDX maybe
> > SEV-ES in some cases).
>

Re: SEV guest attestation

2021-11-25 Thread Daniel P . Berrangé

On Thu, Nov 25, 2021 at 02:44:51PM +0200, Dov Murik wrote:
> [+cc jejb, tobin, jim, hubertus]
> 
> 
> On 25/11/2021 9:14, Sergio Lopez wrote:
> > On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
> >> * Daniel P. Berrangé (berra...@redhat.com) wrote:
> >>> On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
>  Hi,
> 
>  We recently discussed a way for remote SEV guest attestation through 
>  QEMU.
>  My initial approach was to get data needed for attestation through 
>  different
>  QMP commands (all of which are already available, so no changes required
>  there), deriving hashes and certificate data; and collecting all of this
>  into a new QMP struct (SevLaunchStart, which would include the VM's 
>  policy,
>  secret, and GPA) which would need to be upstreamed into QEMU. Once this 
>  is
>  provided, QEMU would then need to have support for attestation before a 
>  VM
>  is started. Upon speaking to Dave about this proposal, he mentioned that
>  this may not be the best approach, as some situations would render the
>  attestation unavailable, such as the instance where a VM is running in a
>  cloud, and a guest owner would like to perform attestation via QMP (a 
>  likely
>  scenario), yet a cloud provider cannot simply let anyone pass arbitrary 
>  QMP
>  commands, as this could be an issue.
> >>>
> >>> As a general point, QMP is a low level QEMU implementation detail,
> >>> which is generally expected to be consumed exclusively on the host
> >>> by a privileged mgmt layer, which will in turn expose its own higher
> >>> level APIs to users or other apps. I would not expect to see QMP
> >>> exposed to anything outside of the privileged host layer.
> >>>
> >>> We also use the QAPI protocol for QEMU guest agent commmunication,
> >>> however, that is a distinct service from QMP on the host. It shares
> >>> most infra with QMP but has a completely diffent command set. On the
> >>> host it is not consumed inside QEMU, but instead consumed by a
> >>> mgmt app like libvirt. 
> >>>
>  So I ask, does anyone involved in QEMU's SEV implementation have any 
>  input
>  on a quality way to perform guest attestation? If so, I'd be interested.
> >>>
> >>> I think what's missing is some clearer illustrations of how this
> >>> feature is expected to be consumed in some real world application
> >>> and the use cases we're trying to solve.
> >>>
> >>> I'd like to understand how it should fit in with common libvirt
> >>> applications across the different virtualization management
> >>> scenarios - eg virsh (command line),  virt-manger (local desktop
> >>> GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
> >>> And of course any non-traditional virt use cases that might be
> >>> relevant such as Kata.
> >>
> >> That's still not that clear; I know Alice and Sergio have some ideas
> >> (cc'd).
> >> There's also some standardisation efforts (e.g. 
> >> https://www.potaroo.net/ietf/html/ids-wg-rats.html 
> >> and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
> >> ) - that I can't claim to fully understand.
> >> However, there are some themes that are emerging:
> >>
> >>   a) One use is to only allow a VM to access some private data once we
> >> prove it's the VM we expect running in a secure/confidential system
> >>   b) (a) normally involves requesting some proof from the VM and then
> >> providing it some confidential data/a key if it's OK
> >>   c) RATs splits the problem up:
> >> 
> >> https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html#name-architectural-overview
> >> I don't fully understand the split yet, but in principal there are
> >> at least a few different things:
> >>
> >>   d) The comms layer
> >>   e) Something that validates the attestation message (i.e. the
> >> signatures are valid, the hashes all add up etc)
> >>   f) Something that knows what hashes to expect (i.e. oh that's a RHEL
> >> 8.4 kernel, or that's a valid kernel command line)
> >>   g) Something that holds some secrets that can be handed out if e & f
> >> are happy.
> >>
> >>   There have also been proposals (e.g. Intel HTTPA) for an attestable
> >> connection after a VM is running; that's probably quite different from
> >> (g) but still involves (e) & (f).
> >>
> >> In the simpler setups d,e,f,g probably live in one place; but it's not
> >> clear where they live - for example one scenario says that your cloud
> >> management layer holds some of them, another says you don't trust your
> >> cloud management layer and you keep them separate.
> >>
> >> So I think all we're actually interested in at the moment, is (d) and
> >> (e) and the way for (g) to get the secret back to the guest.
> >>
> >> Unfortunately the comms and the contents of them varies heavily with
> >> technology; in some you're talking to the qemu/hypervisor (SEV/SEV-ES)
> >> while in some

Re: SEV guest attestation

2021-11-25 Thread Dov Murik




On 25/11/2021 15:27, Daniel P. Berrangé wrote:
> On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
>> * Daniel P. Berrangé (berra...@redhat.com) wrote:
>>> On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
 Hi,

 We recently discussed a way for remote SEV guest attestation through QEMU.
 My initial approach was to get data needed for attestation through 
 different
 QMP commands (all of which are already available, so no changes required
 there), deriving hashes and certificate data; and collecting all of this
 into a new QMP struct (SevLaunchStart, which would include the VM's policy,
 secret, and GPA) which would need to be upstreamed into QEMU. Once this is
 provided, QEMU would then need to have support for attestation before a VM
 is started. Upon speaking to Dave about this proposal, he mentioned that
 this may not be the best approach, as some situations would render the
 attestation unavailable, such as the instance where a VM is running in a
 cloud, and a guest owner would like to perform attestation via QMP (a 
 likely
 scenario), yet a cloud provider cannot simply let anyone pass arbitrary QMP
 commands, as this could be an issue.
>>>
>>> As a general point, QMP is a low level QEMU implementation detail,
>>> which is generally expected to be consumed exclusively on the host
>>> by a privileged mgmt layer, which will in turn expose its own higher
>>> level APIs to users or other apps. I would not expect to see QMP
>>> exposed to anything outside of the privileged host layer.
>>>
>>> We also use the QAPI protocol for QEMU guest agent commmunication,
>>> however, that is a distinct service from QMP on the host. It shares
>>> most infra with QMP but has a completely diffent command set. On the
>>> host it is not consumed inside QEMU, but instead consumed by a
>>> mgmt app like libvirt. 
>>>
 So I ask, does anyone involved in QEMU's SEV implementation have any input
 on a quality way to perform guest attestation? If so, I'd be interested.
>>>
>>> I think what's missing is some clearer illustrations of how this
>>> feature is expected to be consumed in some real world application
>>> and the use cases we're trying to solve.
>>>
>>> I'd like to understand how it should fit in with common libvirt
>>> applications across the different virtualization management
>>> scenarios - eg virsh (command line),  virt-manger (local desktop
>>> GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
>>> And of course any non-traditional virt use cases that might be
>>> relevant such as Kata.
>>
>> That's still not that clear; I know Alice and Sergio have some ideas
>> (cc'd).
>> There's also some standardisation efforts (e.g. 
>> https://www.potaroo.net/ietf/html/ids-wg-rats.html 
>> and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
>> ) - that I can't claim to fully understand.
>> However, there are some themes that are emerging:
>>
>>   a) One use is to only allow a VM to access some private data once we
>> prove it's the VM we expect running in a secure/confidential system
>>   b) (a) normally involves requesting some proof from the VM and then
>> providing it some confidential data/a key if it's OK
> 
> I guess I'm wondering what the threat we're protecting against is,
> and / or which pieces of the stack we can trust ?
> 
> eg, if the host has 2 VMs running, we verify the 1st and provide
> its confidental data back to the host, what stops the host giving
> that dat to the 2nd non-verified VM ? 

The host can't read the injected secret: It is encrypted with a key that
is available only to the PSP.  The PSP receives it and writes it in a
guest-encrypted memory (which the host also cannot read; for the guest
it's a simple memory access with C-bit=1).  So it's a per-vm-invocation
secret.


> 
> Presumably the data has to be encrypted with a key that is uniquely
> tied to this specific boot attempt of the verified VM, and not
> accessible to any other VM, or to future boots of this VM ?

Yes, launch blob, which (if I recall correctly) the Guest Owner should
generate and give to the Cloud Provider so it can start a VM with it
(this is one of the options on the sev-guest object).

-Dov


> 
> 
>>   c) RATs splits the problem up:
>> 
>> https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html#name-architectural-overview
>> I don't fully understand the split yet, but in principal there are
>> at least a few different things:
>>
>>   d) The comms layer
>>   e) Something that validates the attestation message (i.e. the
>> signatures are valid, the hashes all add up etc)
>>   f) Something that knows what hashes to expect (i.e. oh that's a RHEL
>> 8.4 kernel, or that's a valid kernel command line)
>>   g) Something that holds some secrets that can be handed out if e & f
>> are happy.
>>
>>   There have also been proposals (e.g. Intel HTTPA) for an attestable
>

Re: SEV guest attestation

2021-11-25 Thread Daniel P . Berrangé

On Thu, Nov 25, 2021 at 08:14:28AM +0100, Sergio Lopez wrote:
> For SEV-SNP, this is pretty much the end of the story, because the
> attestation exchange is driven by an agent inside the guest. Well,
> there's also the need to have in the VM a well-known vNIC bridged to a
> network that's routed to the Attestation Server, that everyone seems
> to consider a given, but to me, from a CSP perspective, looks like
> quite a headache. In fact, I'd go as far as to suggest this
> communication should happen through an alternative channel, such as
> vsock, having a proxy on the Host, but I guess that depends on the CSP
> infrastructure.

Allowing network connections from inside the VM, to any kind
of host side mgmt LAN services is a big no for some cloud hosts.

They usually desire for any guest network connectivity to be
associated with a VLAN/network segment that is strictly isolated
from any host mgmt LAN.

OpenStack provides a virtual CCDROM for injecting cloud-init
metadata as an alternative to the network based metadata REST
service, since they latter often isn't deployed.

Similarly for virtual filesystems, we've designed virtiofs,
rather than relying on a 2nd NIC combined with NFS.

We cannot assume availability of a real network device for the
attestation. If one does exist fine, but there needs to be an
alternative option that can be used.

On a slightly different topic - if the attestation is driven
from an agent inside the guest, this seems to imply we let the
guest vCPUs start beforre attestation is done. Contrary to
the SEV/SEV-ES where we seem to be wanting vCPUs to remain
in the stopped state until attestation is complete & secrets
provided.  If the vCPUs are started, is there some mechanism
to restrict what can be done  before attestation is complete?

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[PATCH 0/2] block-backend: Retain permissions after migration

2021-11-25 Thread Hanna Reitz

Hi,

Peng Liang has reported an issue regarding migration of raw images here:
https://lists.nongnu.org/archive/html/qemu-block/2021-11/msg00673.html

It turns out that after migrating, all permissions are shared when they
weren’t before.  The cause of the problem is that we deliberately delay
restricting the shared permissions until migration is really done (until
the runstate is no longer INMIGRATE) and first share all permissions;
but this causes us to lose the original shared permission mask and
overwrites it with BLK_PERM_ALL, so once we do try to restrict the
shared permissions, we only again share them all.

Fix this by saving the set of shared permissions through the first
blk_perm_set() call that shares all; and add a regression test.


I don’t believe we have to fix this in 6.2, because I think this bug has
existed for four years now.  (I.e. it isn’t critical, and it’s no
regression.)


Hanna Reitz (2):
  block-backend: Retain permissions after migration
  iotests/migration-permissions: New test

 block/block-backend.c |  11 ++
 .../qemu-iotests/tests/migration-permissions  | 101 ++
 .../tests/migration-permissions.out   |   5 +
 3 files changed, 117 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/migration-permissions
 create mode 100644 tests/qemu-iotests/tests/migration-permissions.out

-- 
2.33.1

[PATCH 2/2] iotests/migration-permissions: New test

2021-11-25 Thread Hanna Reitz

This test checks that a raw image in use by a virtio-blk device does not
share the WRITE permission both before and after migration.

Signed-off-by: Hanna Reitz 
---
 .../qemu-iotests/tests/migration-permissions  | 101 ++
 .../tests/migration-permissions.out   |   5 +
 2 files changed, 106 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/migration-permissions
 create mode 100644 tests/qemu-iotests/tests/migration-permissions.out

diff --git a/tests/qemu-iotests/tests/migration-permissions 
b/tests/qemu-iotests/tests/migration-permissions
new file mode 100755
index 00..6be02581c7
--- /dev/null
+++ b/tests/qemu-iotests/tests/migration-permissions
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+# group: migration
+#
+# Copyright (C) 2021 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import iotests
+from iotests import imgfmt, qemu_img_create, qemu_io
+
+
+test_img = os.path.join(iotests.test_dir, 'test.img')
+mig_sock = os.path.join(iotests.sock_dir, 'mig.sock')
+
+
+class TestMigrationPermissions(iotests.QMPTestCase):
+def setUp(self):
+qemu_img_create('-f', imgfmt, test_img, '1M')
+
+# Set up two VMs (source and destination) accessing the same raw
+# image file with a virtio-blk device; prepare the destination for
+# migration with .add_incoming() and enable migration events
+vms = [None, None]
+for i in range(2):
+vms[i] = iotests.VM(path_suffix=f'{i}')
+vms[i].add_blockdev(f'file,node-name=prot,filename={test_img}')
+vms[i].add_blockdev(f'{imgfmt},node-name=fmt,file=prot')
+vms[i].add_device('virtio-blk,drive=fmt')
+
+if i == 1:
+vms[i].add_incoming(f'unix:{mig_sock}')
+
+vms[i].launch()
+
+result = vms[i].qmp('migrate-set-capabilities',
+capabilities=[
+{'capability': 'events', 'state': True}
+])
+self.assert_qmp(result, 'return', {})
+
+self.vm_s = vms[0]
+self.vm_d = vms[1]
+
+def tearDown(self):
+self.vm_s.shutdown()
+self.vm_d.shutdown()
+try:
+os.remove(mig_sock)
+except FileNotFoundError:
+pass
+os.remove(test_img)
+
+# Migrate an image in use by a virtio-blk device to another VM and
+# verify that the WRITE permission is unshared both before and after
+# migration
+def test_post_migration_permissions(self):
+# Try to access the image R/W, which should fail because virtio-blk
+# has not been configured with share-rw=on
+log = qemu_io('-f', imgfmt, '-c', 'quit', test_img)
+if not log.strip():
+print('ERROR (pre-migration): qemu-io should not be able to '
+  'access this image, but it reported no error')
+else:
+# This is the expected output
+assert 'Is another process using the image' in log
+
+# Now migrate the VM
+self.vm_s.qmp('migrate', uri=f'unix:{mig_sock}')
+assert self.vm_s.wait_migration(None)
+assert self.vm_d.wait_migration(None)
+
+# Try the same qemu-io access again, verifying that the WRITE
+# permission remains unshared
+log = qemu_io('-f', imgfmt, '-c', 'quit', test_img)
+if not log.strip():
+print('ERROR (post-migration): qemu-io should not be able to '
+  'access this image, but it reported no error')
+else:
+# This is the expected output
+assert 'Is another process using the image' in log
+
+
+if __name__ == '__main__':
+# Only works with raw images because we are testing the
+# BlockBackend permissions; image format drivers may additionally
+# unshare permissions and thus tamper with the result
+iotests.main(supported_fmts=['raw'],
+ supported_protocols=['file'])
diff --git a/tests/qemu-iotests/tests/migration-permissions.out 
b/tests/qemu-iotests/tests/migration-permissions.out
new file mode 100644
index 00..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/tests/migration-permissions.out
@@ -0,0 +1,5 @@
+.
+--
+Ran 1 tests
+
+OK
-- 
2.33.1

[PATCH 1/2] block-backend: Retain permissions after migration

2021-11-25 Thread Hanna Reitz

After migration, the permissions the guest device wants to impose on its
BlockBackend are stored in blk->perm and blk->shared_perm.  In
blk_root_activate(), we take our permissions, but keep all shared
permissions open by calling `blk_set_perm(blk->perm, BLK_PERM_ALL)`.

Only afterwards (immediately or later, depending on the runstate) do we
restrict the shared permissions by calling
`blk_set_perm(blk->perm, blk->shared_perm)`.  Unfortunately, our first
call with shared_perm=BLK_PERM_ALL has overwritten blk->shared_perm to
be BLK_PERM_ALL, so this is a no-op and the set of shared permissions is
not restricted.

Fix this bug by saving the set of shared permissions before invoking
blk_set_perm() with BLK_PERM_ALL and restoring it afterwards.

Fixes: 5f7772c4d0cf32f4e779fcd5a69ae4dae24aeebf
   ("block-backend: Defer shared_perm tightening migration
   completion")
Reported-by: Peng Liang 
Signed-off-by: Hanna Reitz 
---
 block/block-backend.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 12ef80ea17..41e388fe1f 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -190,6 +190,7 @@ static void blk_root_activate(BdrvChild *child, Error 
**errp)
 {
 BlockBackend *blk = child->opaque;
 Error *local_err = NULL;
+uint64_t saved_shared_perm;
 
 if (!blk->disable_perm) {
 return;
@@ -197,12 +198,22 @@ static void blk_root_activate(BdrvChild *child, Error 
**errp)
 
 blk->disable_perm = false;
 
+/*
+ * blk->shared_perm contains the permissions we want to share once
+ * migration is really completely done.  For now, we need to share
+ * all; but we also need to retain blk->shared_perm, which is
+ * overwritten by a successful blk_set_perm() call.  Save it and
+ * restore it below.
+ */
+saved_shared_perm = blk->shared_perm;
+
 blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 blk->disable_perm = true;
 return;
 }
+blk->shared_perm = saved_shared_perm;
 
 if (runstate_check(RUN_STATE_INMIGRATE)) {
 /* Activation can happen when migration process is still active, for
-- 
2.33.1

Re: [PATCH v5 3/3] cpus-common: implement dirty limit on vCPU

2021-11-25 Thread Markus Armbruster

Hyman Huang  writes:

> 在 2021/11/24 23:33, Markus Armbruster 写道:
>> huang...@chinatelecom.cn writes:
>> 
>>> From: Hyman Huang(黄勇) 
>>>
>>> Implement dirtyrate calculation periodically basing on
>>> dirty-ring and throttle vCPU until it reachs the quota
>>> dirtyrate given by user.
>>>
>>> Introduce qmp commands set-dirty-limit/cancel-dirty-limit to
>>> set/cancel dirty limit on vCPU.
>>>
>>> Signed-off-by: Hyman Huang(黄勇) 

[...]

>>> diff --git a/qapi/migration.json b/qapi/migration.json
>>> index bbfd48c..42b260e 100644
>>> --- a/qapi/migration.json
>>> +++ b/qapi/migration.json
>>> @@ -1850,6 +1850,49 @@
>>>   { 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }
>>> ##
>>> +# @set-dirty-limit:
>>> +#
>>> +# Set the upper limit of dirty page rate for the interested vCPU.
>> "for a vCPU"
>> 
>>> +#
>>> +# This command could be used to cap the vCPU memory load, which is also
>>> +# refered as "dirty page rate". Users can use set-dirty-limit 
>>> unconditionally,
>>> +# but if one want to know which vCPU is in high memory load and which vCPU
>>> +# should be limited, "calc-dirty-rate" with "dirty-ring" mode maybe an
>>> +# availiable method.
>> I think you should mention that the command fails unless dirty ring
>> is
>> enabled, and a pointer to its documentation.
>> 
> Emm, it seems that there's no documentation about dirty ring in qemu，
> should i metion the commit b4420f19 "KVM: Dirty ring support" for
> dirty-ring?

I think the best you can do then is something like 'Property
"dirty-ring-size" of accelerator object "kvm" must be set.'

[...]

Re: SEV guest attestation

2021-11-25 Thread Dov Murik




On 25/11/2021 15:52, Daniel P. Berrangé wrote:
> On Thu, Nov 25, 2021 at 08:14:28AM +0100, Sergio Lopez wrote:
>> For SEV-SNP, this is pretty much the end of the story, because the
>> attestation exchange is driven by an agent inside the guest. Well,
>> there's also the need to have in the VM a well-known vNIC bridged to a
>> network that's routed to the Attestation Server, that everyone seems
>> to consider a given, but to me, from a CSP perspective, looks like
>> quite a headache. In fact, I'd go as far as to suggest this
>> communication should happen through an alternative channel, such as
>> vsock, having a proxy on the Host, but I guess that depends on the CSP
>> infrastructure.
> 
> Allowing network connections from inside the VM, to any kind
> of host side mgmt LAN services is a big no for some cloud hosts.
> 
> They usually desire for any guest network connectivity to be
> associated with a VLAN/network segment that is strictly isolated
> from any host mgmt LAN.
> 
> OpenStack provides a virtual CCDROM for injecting cloud-init
> metadata as an alternative to the network based metadata REST
> service, since they latter often isn't deployed.
> 
> Similarly for virtual filesystems, we've designed virtiofs,
> rather than relying on a 2nd NIC combined with NFS.
> 
> We cannot assume availability of a real network device for the
> attestation. If one does exist fine, but there needs to be an
> alternative option that can be used.
> 
> 
> On a slightly different topic - if the attestation is driven
> from an agent inside the guest, this seems to imply we let the
> guest vCPUs start beforre attestation is done. Contrary to
> the SEV/SEV-ES where we seem to be wanting vCPUs to remain
> in the stopped state until attestation is complete & secrets
> provided.  If the vCPUs are started, is there some mechanism
> to restrict what can be done  before attestation is complete?

The only mechanism is to design the workload in the Guest in a way that
it can't do anything meaningful until the secret is injected, and the
Attestation Server will release the secret only if a proper attestation
report is presented.

James (cc'd) wants to move this attestation check as early as possible
--> "to restrict what can be done before attestation is complete".


-Dov

Re: SEV guest attestation

2021-11-25 Thread Dov Murik

[+cc Tom, Brijesh]

On 25/11/2021 15:42, Daniel P. Berrangé wrote:
> On Thu, Nov 25, 2021 at 02:44:51PM +0200, Dov Murik wrote:
>> [+cc jejb, tobin, jim, hubertus]
>>
>>
>> On 25/11/2021 9:14, Sergio Lopez wrote:
>>> On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
 * Daniel P. Berrangé (berra...@redhat.com) wrote:
> On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
>> Hi,
>>
>> We recently discussed a way for remote SEV guest attestation through 
>> QEMU.
>> My initial approach was to get data needed for attestation through 
>> different
>> QMP commands (all of which are already available, so no changes required
>> there), deriving hashes and certificate data; and collecting all of this
>> into a new QMP struct (SevLaunchStart, which would include the VM's 
>> policy,
>> secret, and GPA) which would need to be upstreamed into QEMU. Once this 
>> is
>> provided, QEMU would then need to have support for attestation before a 
>> VM
>> is started. Upon speaking to Dave about this proposal, he mentioned that
>> this may not be the best approach, as some situations would render the
>> attestation unavailable, such as the instance where a VM is running in a
>> cloud, and a guest owner would like to perform attestation via QMP (a 
>> likely
>> scenario), yet a cloud provider cannot simply let anyone pass arbitrary 
>> QMP
>> commands, as this could be an issue.
>
> As a general point, QMP is a low level QEMU implementation detail,
> which is generally expected to be consumed exclusively on the host
> by a privileged mgmt layer, which will in turn expose its own higher
> level APIs to users or other apps. I would not expect to see QMP
> exposed to anything outside of the privileged host layer.
>
> We also use the QAPI protocol for QEMU guest agent commmunication,
> however, that is a distinct service from QMP on the host. It shares
> most infra with QMP but has a completely diffent command set. On the
> host it is not consumed inside QEMU, but instead consumed by a
> mgmt app like libvirt. 
>
>> So I ask, does anyone involved in QEMU's SEV implementation have any 
>> input
>> on a quality way to perform guest attestation? If so, I'd be interested.
>
> I think what's missing is some clearer illustrations of how this
> feature is expected to be consumed in some real world application
> and the use cases we're trying to solve.
>
> I'd like to understand how it should fit in with common libvirt
> applications across the different virtualization management
> scenarios - eg virsh (command line),  virt-manger (local desktop
> GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
> And of course any non-traditional virt use cases that might be
> relevant such as Kata.

 That's still not that clear; I know Alice and Sergio have some ideas
 (cc'd).
 There's also some standardisation efforts (e.g. 
 https://www.potaroo.net/ietf/html/ids-wg-rats.html 
 and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
 ) - that I can't claim to fully understand.
 However, there are some themes that are emerging:

   a) One use is to only allow a VM to access some private data once we
 prove it's the VM we expect running in a secure/confidential system
   b) (a) normally involves requesting some proof from the VM and then
 providing it some confidential data/a key if it's OK
   c) RATs splits the problem up:
 
 https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html#name-architectural-overview
 I don't fully understand the split yet, but in principal there are
 at least a few different things:

   d) The comms layer
   e) Something that validates the attestation message (i.e. the
 signatures are valid, the hashes all add up etc)
   f) Something that knows what hashes to expect (i.e. oh that's a RHEL
 8.4 kernel, or that's a valid kernel command line)
   g) Something that holds some secrets that can be handed out if e & f
 are happy.

   There have also been proposals (e.g. Intel HTTPA) for an attestable
 connection after a VM is running; that's probably quite different from
 (g) but still involves (e) & (f).

 In the simpler setups d,e,f,g probably live in one place; but it's not
 clear where they live - for example one scenario says that your cloud
 management layer holds some of them, another says you don't trust your
 cloud management layer and you keep them separate.

 So I think all we're actually interested in at the moment, is (d) and
 (e) and the way for (g) to get the secret back to the guest.

 Unfortunately the comms and the contents of them varies heavily with
 technology; in som

Re: SEV guest attestation

2021-11-25 Thread Daniel P . Berrangé

On Thu, Nov 25, 2021 at 03:50:46PM +0200, Dov Murik wrote:
> 
> 
> On 25/11/2021 15:27, Daniel P. Berrangé wrote:
> > On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
> >> * Daniel P. Berrangé (berra...@redhat.com) wrote:
> >>> On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
>  Hi,
> 
>  We recently discussed a way for remote SEV guest attestation through 
>  QEMU.
>  My initial approach was to get data needed for attestation through 
>  different
>  QMP commands (all of which are already available, so no changes required
>  there), deriving hashes and certificate data; and collecting all of this
>  into a new QMP struct (SevLaunchStart, which would include the VM's 
>  policy,
>  secret, and GPA) which would need to be upstreamed into QEMU. Once this 
>  is
>  provided, QEMU would then need to have support for attestation before a 
>  VM
>  is started. Upon speaking to Dave about this proposal, he mentioned that
>  this may not be the best approach, as some situations would render the
>  attestation unavailable, such as the instance where a VM is running in a
>  cloud, and a guest owner would like to perform attestation via QMP (a 
>  likely
>  scenario), yet a cloud provider cannot simply let anyone pass arbitrary 
>  QMP
>  commands, as this could be an issue.
> >>>
> >>> As a general point, QMP is a low level QEMU implementation detail,
> >>> which is generally expected to be consumed exclusively on the host
> >>> by a privileged mgmt layer, which will in turn expose its own higher
> >>> level APIs to users or other apps. I would not expect to see QMP
> >>> exposed to anything outside of the privileged host layer.
> >>>
> >>> We also use the QAPI protocol for QEMU guest agent commmunication,
> >>> however, that is a distinct service from QMP on the host. It shares
> >>> most infra with QMP but has a completely diffent command set. On the
> >>> host it is not consumed inside QEMU, but instead consumed by a
> >>> mgmt app like libvirt. 
> >>>
>  So I ask, does anyone involved in QEMU's SEV implementation have any 
>  input
>  on a quality way to perform guest attestation? If so, I'd be interested.
> >>>
> >>> I think what's missing is some clearer illustrations of how this
> >>> feature is expected to be consumed in some real world application
> >>> and the use cases we're trying to solve.
> >>>
> >>> I'd like to understand how it should fit in with common libvirt
> >>> applications across the different virtualization management
> >>> scenarios - eg virsh (command line),  virt-manger (local desktop
> >>> GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
> >>> And of course any non-traditional virt use cases that might be
> >>> relevant such as Kata.
> >>
> >> That's still not that clear; I know Alice and Sergio have some ideas
> >> (cc'd).
> >> There's also some standardisation efforts (e.g. 
> >> https://www.potaroo.net/ietf/html/ids-wg-rats.html 
> >> and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
> >> ) - that I can't claim to fully understand.
> >> However, there are some themes that are emerging:
> >>
> >>   a) One use is to only allow a VM to access some private data once we
> >> prove it's the VM we expect running in a secure/confidential system
> >>   b) (a) normally involves requesting some proof from the VM and then
> >> providing it some confidential data/a key if it's OK
> > 
> > I guess I'm wondering what the threat we're protecting against is,
> > and / or which pieces of the stack we can trust ?
> > 
> > eg, if the host has 2 VMs running, we verify the 1st and provide
> > its confidental data back to the host, what stops the host giving
> > that dat to the 2nd non-verified VM ? 
> 
> The host can't read the injected secret: It is encrypted with a key that
> is available only to the PSP.  The PSP receives it and writes it in a
> guest-encrypted memory (which the host also cannot read; for the guest
> it's a simple memory access with C-bit=1).  So it's a per-vm-invocation
> secret.

Is there some way the PSP verifies which VM is supposed to receive
the injected data. ie the host can't read it, but it can tell the
PSP to inject it to VM B instead of VM A.

> > Presumably the data has to be encrypted with a key that is uniquely
> > tied to this specific boot attempt of the verified VM, and not
> > accessible to any other VM, or to future boots of this VM ?
> 
> Yes, launch blob, which (if I recall correctly) the Guest Owner should
> generate and give to the Cloud Provider so it can start a VM with it
> (this is one of the options on the sev-guest object).

Does something stop the host from booting a 2nd VM on the side with
the same launch blob, and thus be able to also tell the PSP to inject
the secret data into this 2nd VM later too ?

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/p

Re: [RFC PATCH 0/2] QEMU/openbios: PPC Software TLB support in the G4 family

2021-11-25 Thread BALATON Zoltan

On Thu, 25 Nov 2021, Cédric Le Goater wrote:

On 11/25/21 01:45, BALATON Zoltan wrote:

On Wed, 24 Nov 2021, Fabiano Rosas wrote:

Fabiano Rosas  writes:

Hi all,

We have this bug in QEMU which indicates that we haven't been able to
run openbios on a 7450 cpu for quite a long time:

https://gitlab.com/qemu-project/qemu/-/issues/86

OK:
  $ ./qemu-system-ppc -serial mon:stdio -nographic -cpu 7410

 >> =
 >> OpenBIOS 1.1 [Nov 1 2021 20:36]
  ...

NOK:
  $ ./qemu-system-ppc -serial mon:stdio -nographic -cpu 7450 -d int

This CPU appears in PowerMac G4 so maybe better use -machine mac99,via=pmu 
with it as it's strange to put it in a g3beige but that may not matter for 
reproducing the problem.

As for guests, those running on the said PowerMac G4 should have support 
for these CPUs so maybe you can try some Mac OS X versions (or maybe 
MorphOS but that is not the best for debugging as there's no source 
available nor any help from its owners but just to see if it boots it may 
be sufficient, it should work on real PowerMac G4). According to 
 this CPU was used 
in  
and it runs up to Mac OS 10.4.11. (Although OpenBIOS sets the device tree 
according to a PowerMac3,1 so not sure it's entirely correct for the 
PowerMac3,5 that has a 7450 CPU and if it matters for Mac OS X.)

I asked about this before but got no reply back then:
https://lists.nongnu.org/archive/html/qemu-ppc/2020-03/msg00292.html

This was because pegasos2 should have 7447 but it did not work so currently 
I've set it to 7400 which also works. The original board firmware had some 
problem detecting it but I think that only results in wrong CPU speed shown 
which is only a cosmetic problem, otherwise it seems to work. Since 
pegasos2 does not use OpenBIOS but either VOF or the board's original 
firmware it may be an alternative way to test at least 7447 which the 
firmware and guests running on that board should work with. At least Debian 
8.11 powerpc version had support for pegasos2 and should boot, I'm not sure 
newer versions still work. More info on pegasos2 can be found at:

http://zero.eik.bme.hu/~balaton/qemu/amiga/#morphos and
https://osdn.net/projects/qmiga/wiki/SubprojectPegasos2

I would be nice to add some documentation for these machines under :

 https://qemu.readthedocs.io/en/latest/system/target-ppc.html

Yes, I thought about that but haven't got to it yet. I'm also not sure 
what should I put in that doc so if you have time feel free to copy from 
the above URLs what you think is better to have in the docs. Otherwise 
I'll do it sometimes in the future.

Regards,
BALATON Zoltan

Thanks

C.

I don't remember what problem I had with 7447 but if it does not work with 
pegasos2 then maybe there's some other problem with it too. I think it was 
maybe related to TLBs but I don't know and had no time to try again so I 
could be entirely wrong about this.

Regards,
BALATON Zoltan

  Raise exception at fff08cc4 => 004e (00)
  QEMU: Terminated

The actual issue is straightforward. There is a non-architected
feature that QEMU has enabled by default that openbios doesn't know
about. From the user manual:

"The MPC7540 has a set of implementation-specific registers,
exceptions, and instructions that facilitate very efficient software
searching of the page tables in memory for when software table
searching is enabled (HID0[STEN] = 1). This section describes those
resources and provides three example code sequences that can be used
in a MPC7540 system for an efficient search of the translation tables
in software. These three code sequences can be used as handlers for
the three exceptions requiring access to the PTEs in the page tables
in memory in this case-instruction TLB miss, data TLB miss on load,
and data TLB miss on store exceptions."

The current state:

1) QEMU does not check HID0[STEN] and makes the feature always enabled
by setting these cpus with the POWERPC_MMU_SOFT_74xx MMU model,
instead of the generic POWERPC_MMU_32B.

2) openbios does not recognize the PVRs for those cpus and also does
not have any handlers for the software TLB exceptions (vectors 0x1000,
0x1100, 0x1200).

Some assumptions (correct me if I'm wrong please):

- openbios is the only firmware we use for the following cpus: 7441,
7445, 7450, 7451, 7455, 7457, 7447, 7447a, 7448.
- without openbios, we cannot have a guest running on these cpus.

So to bring 7450 back to life we would need to either:

a) find another firmware/guest OS code that supports the feature;

b) implement the switching of the feature in QEMU and have the guest
code enable it only when supported. That would take some fiddling with
the MMU code to: merge POWERPC_MMU_SOFT_74xx into POWERPC_MMU_32B,
check the HID0[STEN] bit, figure out how to switch from HW TLB miss to
SW TLB miss on demand, block a

Re: [PATCH v3 1/3] target/ppc: Fixed call to deferred exception

2021-11-25 Thread BALATON Zoltan


On Thu, 25 Nov 2021, David Gibson wrote:

On Thu, Nov 25, 2021 at 01:49:46AM +0100, BALATON Zoltan wrote:

On Wed, 24 Nov 2021, Lucas Mateus Castro (alqotel) wrote:

mtfsf, mtfsfi and mtfsb1 instructions call helper_float_check_status
after updating the value of FPSCR, but helper_float_check_status
checks fp_status and fp_status isn't updated based on FPSCR and
since the value of fp_status is reset earlier in the instruction,
it's always 0.

Because of this helper_float_check_status would change the FI bit to 0
as this bit checks if the last operation was inexact and
float_flag_inexact is always 0.

These instructions also don't throw exceptions correctly since
helper_float_check_status throw exceptions based on fp_status.

This commit created a new helper, helper_fpscr_check_status that checks
FPSCR value instead of fp_status and checks for a larger variety of
exceptions than do_float_check_status.

Since fp_status isn't used, gen_reset_fpstatus() was removed.

The hardware used to compare QEMU's behavior to was a Power9.

Signed-off-by: Lucas Mateus Castro (alqotel) 
---
target/ppc/fpu_helper.c| 48 ++
target/ppc/helper.h|  1 +
target/ppc/translate/fp-impl.c.inc |  9 ++
3 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index c4896cecc8..bb72715827 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -414,6 +414,54 @@ void helper_store_fpscr(CPUPPCState *env, uint64_t val, 
uint32_t nibbles)
ppc_store_fpscr(env, val);
}

+void helper_fpscr_check_status(CPUPPCState *env)
+{
+CPUState *cs = env_cpu(env);
+target_ulong fpscr = env->fpscr;
+int error = 0;
+
+if ((fpscr & FP_OX) && (fpscr & FP_OE)) {
+error = POWERPC_EXCP_FP_OX;
+} else if ((fpscr & FP_UX) && (fpscr & FP_UE)) {
+error = POWERPC_EXCP_FP_UX;
+} else if ((fpscr & FP_XX) && (fpscr & FP_XE)) {
+error = POWERPC_EXCP_FP_XX;
+} else if ((fpscr & FP_ZX) && (fpscr & FP_ZE)) {


I wonder if these tests could be simplified by combining the masks if you
want to test for both bits set so e.g. fpscr & (FP_ZX | FP_ZE) should be the
same, shouldn't it?


No, it's not.  In fact your version is equivalent as a boolean to
((fpscr & FP_ZX) || (fpscr & FP_ZE))


Indeed, it was too late when I wrote. I was really thinking
(fprscr & (FP_ZX | FP_ZE)) == (FP_ZX | FP_ZE) but that's not simpler so 
that answers my question.


Regards,
BALATON Zoltan

Re: [PATCH 1/2] block-backend: Retain permissions after migration

2021-11-25 Thread Philippe Mathieu-Daudé

On 11/25/21 14:53, Hanna Reitz wrote:
> After migration, the permissions the guest device wants to impose on its
> BlockBackend are stored in blk->perm and blk->shared_perm.  In
> blk_root_activate(), we take our permissions, but keep all shared
> permissions open by calling `blk_set_perm(blk->perm, BLK_PERM_ALL)`.
> 
> Only afterwards (immediately or later, depending on the runstate) do we
> restrict the shared permissions by calling
> `blk_set_perm(blk->perm, blk->shared_perm)`.  Unfortunately, our first
> call with shared_perm=BLK_PERM_ALL has overwritten blk->shared_perm to
> be BLK_PERM_ALL, so this is a no-op and the set of shared permissions is
> not restricted.
> 
> Fix this bug by saving the set of shared permissions before invoking
> blk_set_perm() with BLK_PERM_ALL and restoring it afterwards.
> 
> Fixes: 5f7772c4d0cf32f4e779fcd5a69ae4dae24aeebf
>("block-backend: Defer shared_perm tightening migration
>completion")
> Reported-by: Peng Liang 
> Signed-off-by: Hanna Reitz 
> ---
>  block/block-backend.c | 11 +++
>  1 file changed, 11 insertions(+)

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v2 00/15] hw/nvme: SR-IOV with Virtualization Enhancements

2021-11-25 Thread Łukasz Gieryk

On Wed, Nov 24, 2021 at 09:03:06AM +0100, Klaus Jensen wrote:
> Hi Lukasz,
> 
> I've been through this. I have a couple of review comments, but overall
> looks good for inclusion in nvme-next. Would be nice to get this in
> early in the cycle so it can mature there for 7.0.

We (I’m speaking on behalf of the other Lukasz) are really happy to
read that. We will do our best to make it happen.

> 
> I'd like that we mark this support experimental, so we can easily do
> some changes to how parameters work since I'm not sure we completely
> agree on that yet.
> 
> By the way, in the future, please add me and Keith as CCs on the entire
> series so we get CC'ed on replies to the cover-letter ;)
> 

> > List of known gaps and nice-to-haves:
> > 
> > 1) Interaction of secondary controllers with namespaces is not 100%
> > following the spec
> > 
> > The limitation: VF has to be visible on the PCI bus first, and only then
> > such VF can have a namespace attached.
> > 
> 
> Looking at the spec I'm not even sure what the expected behavior is
> supposed to be, can you elaborate? I rebased this on latest, and with
> Hannes changes, shared namespaces will be attached by default, which
> seems to be reasonable.

An example flow:

# Release flexible resources from PF (assuming it’s /dev/nvme0)
nvme virt-mgmt -c 0 -r 0 -n 0 -a 1 /dev/nvme0
nvme virt-mgmt -c 0 -r 1 -n 0 -a 1 /dev/nvme0
echo 1 > /sys/class/nvme/nvme0/reset_controller
# Bind sane minimums to VF1 (cntlid=1) and set it online
nvme virt-mgmt -c 1 -r 0 -n 5 -a 8 /dev/nvme0
nvme virt-mgmt -c 1 -r 1 -n 5 -a 8 /dev/nvme0
nvme virt-mgmt -c 1 -a 9 /dev/nvme0
# Enable 2 VFs
echo 2 > /sys/bus/pci/devices//sriov_numvfs
# PF, VF1 and VF2 are visible on PCI
lspci | grep Non-Volatile
# The NVMe driver is bound to PF and VF1 (the only online VF)
nvme list -v
# VFs shall eventually not support Ns Management/Attachment commands,
# and namespaces should be attached to VFs (i.e., their secondary
# controllers) through the PF.
# A namespace can be attached to VF1, VF2
nvme attach-ns /dev/nvme0 -c 1 -n X
nvme attach-ns /dev/nvme0 -c 2 -n X
# According to the spec this should also succeed, but today it won’t
nvme attach-ns /dev/nvme0 -c 3 -n X

VF3’s NvmeCtrl object is not yet allocated, so today there’s nothing
for nvme_subsys_ctrl() to return for cntlid=3, besides NULL (the
current behavior) or SUBSYS_SLOT_RSVD.

Relevant use cases:
 - admin can configure disabled VFs,
 - information about attached ns persists when VFs are disabled,
are not that critical, but of course it’s a discrepancy from what a
real device can handle.

In my opinion, to handle the cases correctly, information about attached
namespaces could be moved to subsystem. Could you share your thoughts
whether such approach would make sense?

Re: [RFC PATCH 0/2] QEMU/openbios: PPC Software TLB support in the G4 family

2021-11-25 Thread Cédric Le Goater


 https://qemu.readthedocs.io/en/latest/system/target-ppc.html


Yes, I thought about that but haven't got to it yet. I'm also not sure what 
should I put in that doc so if you have time feel free to copy from the above 
URLs what you think is better to have in the docs. Otherwise I'll do it 
sometimes in the future.


ok. We don't have to copy all of the contents. We can reference
external URLs for more complete information.

Thanks,

C.

Re: [PATCH v5 07/18] target/riscv: setup everything so that riscv128-softmmu compiles

2021-11-25 Thread Frédéric Pétrot


On 25/11/2021 12:47, Alistair Francis wrote:

On Wed, Nov 24, 2021 at 5:33 PM Philippe Mathieu-Daudé  wrote:


Hi Frédéric,

On 11/24/21 07:55, Frédéric Pétrot wrote:

On 24/11/2021 07:12, Alistair Francis wrote:

On Sat, Nov 13, 2021 at 1:16 AM Frédéric Pétrot
 wrote:


This patch is kind of a mess because several files have to be slightly
modified to allow for a new target. In the current status, we have done
our best to have RV64 and RV128 under the same RV64 umbrella, but there
is still work to do to have a single executable for both.
In particular, we have no atomic accesses for aligned 128-bit addresses.

Once this patch applied, adding risc128-sofmmu to --target-list produces
a (no so useful yet) executable.


I can't remember if we discussed this before, but do we need the
riscv128-sofmmu executable? Can we instead just use a riscv64-sofmmu
executable?


   Hello Alistair,
   Richard was also advocating for a single executable, but pointed out that
   we need to disable mttcg because there is a need for specific tcg
support for
   128-bit aligned atomics.
   Given my understanding of that part of QEMU, I choose the easy way to
disable
   it once and for all at compile time until we have that.



In rv128_base_cpu_init():

   if (qemu_tcg_mttcg_enabled) {
   /* Missing 128-bit aligned atomics */
   error_report("128-bit RISC-V currently does not work"
" with Multi Threaded TCG. Please use:"
" -accel tcg,thread=single");
   exit(EXIT_FAILURE);
   }


That seems like a good option! I think we could add this to the CPU
realise function.

The problem with a riscv128-sofmmu executable is that it's hard to get
rid of in the future. We are very slowly moving towards a single
executable and adding a new one means we are stuck with it for a
while.

Alistair


  Ooups, I replied yesterday to Philippe that I would work that out, but I
  missed the reply all button, sorry.

  Frédéric




Regards,

Phil.


--
+---+
| Frédéric Pétrot, Pr. Grenoble INP-Ensimag/TIMA,   Ensimag deputy director |
| Mob/Pho: +33 6 74 57 99 65/+33 4 76 57 48 70  Ad augusta  per angusta |
| http://tima.univ-grenoble-alpes.fr frederic.pet...@univ-grenoble-alpes.fr |
+---+

[Bug 1603636] Re: Guest has not initialized the display yet on ubuntu 16.10 PPC

2021-11-25 Thread Paul White

** Changed in: ubuntu
   Status: New => Invalid

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1603636

Title:
  Guest has not initialized the display yet on ubuntu 16.10 PPC

Status in QEMU:
  Fix Released
Status in Ubuntu:
  Invalid

Bug description:
  Hi
  tested with all kind of configure, with all kind of machine types but i have 
the same issue ... 
  on lastest quemo 2.6 "Guest has not initialized the display yet"
  note with lastest git repository the situation become worst because on 
i386-softmmu i have the message but qemu exit alone because looklike there is 
not a bios 

  this is gdb of i386-softmmu

  (gdb) run
  Starting program: /home/amigaone/src/qemu/i386-softmmu/qemu-system-i386 
  [Thread debugging using libthread_db enabled]
  Using host libthread_db library "/lib/powerpc-linux-gnu/libthread_db.so.1".
  [New Thread 0xf7f78b70 (LWP 25074)]
  [New Thread 0xf770bb70 (LWP 25075)]
  [New Thread 0xf6dfdb70 (LWP 25076)]
  [New Thread 0xf65fdb70 (LWP 25077)]
  [New Thread 0xf3337b70 (LWP 25078)]
  [New Thread 0xe4146b70 (LWP 25087)]
  qemu-system-i386: Trying to execute code outside RAM or ROM at 0x000a
  This usually means one of the following happened:

  (1) You told QEMU to execute a kernel for the wrong machine type, and it 
crashed on startup (eg trying to run a raspberry pi kernel on a versatilepb 
QEMU machine)
  (2) You didn't give QEMU a kernel or BIOS filename at all, and QEMU executed 
a ROM full of no-op instructions until it fell off the end
  (3) Your guest kernel has a bug and crashed by jumping off into nowhere

  This is almost always one of the first two, so check your command line and 
that you are using the right type of kernel for this machine.
  If you think option (3) is likely then you can try debugging your guest with 
the -d debug options; in particular -d guest_errors will cause the log to 
include a dump of the guest register state at this point.

  Execution cannot continue; stopping here.

  [Thread 0xe4146b70 (LWP 25087) exited]
  [Thread 0xf65fdb70 (LWP 25077) exited]
  [Thread 0xf6dfdb70 (LWP 25076) exited]
  [Thread 0xf770bb70 (LWP 25075) exited]
  [Thread 0xf7f78b70 (LWP 25074) exited]
  [Thread 0xf7f7c000 (LWP 25070) exited]
  [Inferior 1 (process 25070) exited with code 01]

  
  this is my ldd 
  ldd ./qemu-system-i386 
linux-vdso32.so.1 =>  (0x0010)
libvirglrenderer.so.0 => /usr/local/lib/libvirglrenderer.so.0 
(0x0ff8a000)
libepoxy.so.0 => /usr/lib/powerpc-linux-gnu/libepoxy.so.0 (0x0fe86000)
libgbm.so.1 => /usr/local/lib/libgbm.so.1 (0x0fe55000)
libX11.so.6 => /usr/lib/powerpc-linux-gnu/libX11.so.6 (0x0fcf2000)
libz.so.1 => /lib/powerpc-linux-gnu/libz.so.1 (0x0fcb1000)
libcurl-gnutls.so.4 => /usr/lib/powerpc-linux-gnu/libcurl-gnutls.so.4 
(0x0fc1)
libssh2.so.1 => /usr/lib/powerpc-linux-gnu/libssh2.so.1 (0x0fbbf000)
libbz2.so.1.0 => /lib/powerpc-linux-gnu/libbz2.so.1.0 (0x0fb7e000)
libpixman-1.so.0 => /usr/lib/powerpc-linux-gnu/libpixman-1.so.0 
(0x0fadd000)
libutil.so.1 => /lib/powerpc-linux-gnu/libutil.so.1 (0x0faac000)
libnuma.so.1 => /usr/lib/powerpc-linux-gnu/libnuma.so.1 (0x0fa79000)
libncurses.so.5 => /lib/powerpc-linux-gnu/libncurses.so.5 (0x0fa28000)
libtinfo.so.5 => /lib/powerpc-linux-gnu/libtinfo.so.5 (0x0f9d7000)
libuuid.so.1 => /lib/powerpc-linux-gnu/libuuid.so.1 (0x0f9a6000)
libpng16.so.16 => /usr/lib/powerpc-linux-gnu/libpng16.so.16 (0x0f945000)
libjpeg.so.8 => /usr/lib/powerpc-linux-gnu/libjpeg.so.8 (0x0f8d4000)
libSDL2-2.0.so.0 => /usr/local/lib/libSDL2-2.0.so.0 (0x0f77d000)
libnettle.so.6 => /usr/lib/powerpc-linux-gnu/libnettle.so.6 (0x0f71c000)
libgnutls.so.30 => /usr/lib/powerpc-linux-gnu/libgnutls.so.30 
(0x0f5ca000)
libgtk-x11-2.0.so.0 => /usr/lib/powerpc-linux-gnu/libgtk-x11-2.0.so.0 
(0x0f0e6000)
libgdk-x11-2.0.so.0 => /usr/lib/powerpc-linux-gnu/libgdk-x11-2.0.so.0 
(0x0f005000)
libcairo.so.2 => /usr/lib/powerpc-linux-gnu/libcairo.so.2 (0x0eec3000)
libgdk_pixbuf-2.0.so.0 => 
/usr/lib/powerpc-linux-gnu/libgdk_pixbuf-2.0.so.0 (0x0ee72000)
libgobject-2.0.so.0 => /usr/lib/powerpc-linux-gnu/libgobject-2.0.so.0 
(0x0edf1000)
libglib-2.0.so.0 => /lib/powerpc-linux-gnu/libglib-2.0.so.0 (0x0eca)
libsnappy.so.1 => /usr/lib/powerpc-linux-gnu/libsnappy.so.1 (0x0ec6f000)
libusb-1.0.so.0 => /lib/powerpc-linux-gnu/libusb-1.0.so.0 (0x0ec2e000)
librt.so.1 => /lib/powerpc-linux-gnu/librt.so.1 (0x0ebfd000)
libm.so.6 => /lib/powerpc-linux-gnu/libm.so.6 (0x0eb0c000)
libgcc_s.so.1 => /lib/powerpc-linux-gnu/libgcc_s.so.1 (0x0eacb000)
libpthread.so.0 => /lib/powerpc-linux-gnu/libpthread.so.0 (0x0ea88000)
libc.so.6 => /lib/powerpc-linux-gnu/libc.so.6 (0x0e8d4000)
libdrm.

Re: SEV guest attestation

2021-11-25 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> On Thu, Nov 25, 2021 at 08:14:28AM +0100, Sergio Lopez wrote:
> > For SEV-SNP, this is pretty much the end of the story, because the
> > attestation exchange is driven by an agent inside the guest. Well,
> > there's also the need to have in the VM a well-known vNIC bridged to a
> > network that's routed to the Attestation Server, that everyone seems
> > to consider a given, but to me, from a CSP perspective, looks like
> > quite a headache. In fact, I'd go as far as to suggest this
> > communication should happen through an alternative channel, such as
> > vsock, having a proxy on the Host, but I guess that depends on the CSP
> > infrastructure.
> 
> Allowing network connections from inside the VM, to any kind
> of host side mgmt LAN services is a big no for some cloud hosts.
> 
> They usually desire for any guest network connectivity to be
> associated with a VLAN/network segment that is strictly isolated
> from any host mgmt LAN.
> 
> OpenStack provides a virtual CCDROM for injecting cloud-init
> metadata as an alternative to the network based metadata REST
> service, since they latter often isn't deployed.
> 
> Similarly for virtual filesystems, we've designed virtiofs,
> rather than relying on a 2nd NIC combined with NFS.
> 
> We cannot assume availability of a real network device for the
> attestation. If one does exist fine, but there needs to be an
> alternative option that can be used.
> 
> 
> On a slightly different topic - if the attestation is driven
> from an agent inside the guest, this seems to imply we let the
> guest vCPUs start beforre attestation is done. Contrary to
> the SEV/SEV-ES where we seem to be wanting vCPUs to remain
> in the stopped state until attestation is complete & secrets
> provided.

That's right; SEV/SEV-ES is the odd case here.

> If the vCPUs are started, is there some mechanism
> to restrict what can be done  before attestation is complete?

Just the fact you haven't provided it the keys to decrypt it's disk to
do anything interesting; there's the potential to add extra if you
wanted (e.g. 802.1X network auth).

Dave

> 
> Regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[PATCH v8 00/10] PMU-EBB support for PPC64 TCG

2021-11-25 Thread Daniel Henrique Barboza

Hi,

In this new version considerable changes were made based on David's
feedback of the previous version. All the counter frozen logic was
moved from the body of helper_store_mmcr0 to pmc_get_event() via a
new PMUEventType called PMU_EVENT_INACTIVE. The function
pmu_update_cycles() is now called in multiple instances to update
the counter values before a change in the PMU state might be made.

All this changes culminated into the removal of the 'cycle session'
idea that was present in the previous version. The logic is now more
straightforward for all the other aspects of the PMU.

Changes from v7:
- patch 1:
  * added David's R-b
  * added PMU_EVENT_INACTIVE event
- patch 2:
  * 'cycle count session' concept was removed
  * pmc_update_cycles() is now a helper that can be called at
all times to update the PMCs using the current MMCR0/MMCR1 states
  * logic using curr_FC and old_FC inside helper_store_mmcr0
was removed
  * renamed getPMUEventType() to pmc_get_event()
- patch 3 (former patch 6):
  * moved up and now also handles PMC updates on PMC read
- patch 4 (new):
  * update counters on MMCR1 write
- patch 5 (former 3):
  * a new helper 'pmc_update_overflow_timer' was added to handle
changes on a single overflow counter
  * writes on a PMC, instead of trigger an update on all overflow
timers, will update just its own timer
- former patch 7: merged into patch 5
- v7 link: https://lists.gnu.org/archive/html/qemu-devel/2021-11/msg04185.html


Daniel Henrique Barboza (9):
  target/ppc: introduce PMUEventType and PMU overflow timers
  target/ppc: PMU basic cycle count for pseries TCG
  target/ppc: PMU: update counters on PMCs r/w
  target/ppc: PMU: update counters on MMCR1 write
  target/ppc: enable PMU counter overflow with cycle events
  target/ppc: enable PMU instruction count
  target/ppc/power8-pmu.c: add PM_RUN_INST_CMPL (0xFA) event
  PPC64/TCG: Implement 'rfebb' instruction
  target/ppc/excp_helper.c: EBB handling adjustments

Gustavo Romero (1):
  target/ppc: PMU Event-Based exception support

 hw/ppc/spapr_cpu_core.c|   1 +
 target/ppc/cpu.h   |  61 +++-
 target/ppc/cpu_init.c  |  46 +++-
 target/ppc/excp_helper.c   |  93 +++
 target/ppc/helper.h|   6 +
 target/ppc/helper_regs.c   |   4 +
 target/ppc/insn32.decode   |   5 +
 target/ppc/meson.build |   1 +
 target/ppc/power8-pmu-regs.c.inc   |  69 -
 target/ppc/power8-pmu.c| 368 +
 target/ppc/power8-pmu.h|  25 ++
 target/ppc/spr_tcg.h   |   5 +
 target/ppc/translate.c |  60 
 target/ppc/translate/branch-impl.c.inc |  33 +++
 14 files changed, 762 insertions(+), 15 deletions(-)
 create mode 100644 target/ppc/power8-pmu.c
 create mode 100644 target/ppc/power8-pmu.h
 create mode 100644 target/ppc/translate/branch-impl.c.inc

-- 
2.31.1

[PATCH v8 01/10] target/ppc: introduce PMUEventType and PMU overflow timers

2021-11-25 Thread Daniel Henrique Barboza

This patch starts an IBM Power8+ compatible PMU implementation by adding
the representation of PMU events that we are going to sample,
PMUEventType. This enum represents a Perf event that is being sampled by
a specific counter 'sprn'. Events that aren't available (i.e. no event
was set in MMCR1) will be of type 'PMU_EVENT_INVALID'. Events that are
inactive due to frozen counter bits state are of type
'PMU_EVENT_INACTIVE'. Other types added in this patch are
PMU_EVENT_CYCLES and PMU_EVENT_INSTRUCTIONS.  More types will be added
later on.

Let's also add the required PMU cycle overflow timers. They will be used
to trigger cycle overflows when cycle events are being sampled. This
timer will call cpu_ppc_pmu_timer_cb(), which in turn calls
fire_PMC_interrupt().  Both functions are stubs that will be implemented
later on when EBB support is added.

Two new helper files are created to host this new logic.
cpu_ppc_pmu_init() will init all overflow timers during CPU init time.

Reviewed-by: David Gibson 
Signed-off-by: Daniel Henrique Barboza 
---
 hw/ppc/spapr_cpu_core.c |  1 +
 target/ppc/cpu.h| 16 
 target/ppc/cpu_init.c   | 24 ++
 target/ppc/meson.build  |  1 +
 target/ppc/power8-pmu.c | 56 +
 target/ppc/power8-pmu.h | 25 ++
 6 files changed, 123 insertions(+)
 create mode 100644 target/ppc/power8-pmu.c
 create mode 100644 target/ppc/power8-pmu.h

diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 58e7341cb7..a57ba70a87 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -20,6 +20,7 @@
 #include "target/ppc/kvm_ppc.h"
 #include "hw/ppc/ppc.h"
 #include "target/ppc/mmu-hash64.h"
+#include "target/ppc/power8-pmu.h"
 #include "sysemu/numa.h"
 #include "sysemu/reset.h"
 #include "sysemu/hw_accel.h"
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index e946da5f3a..2ad47b06d0 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -296,6 +296,16 @@ typedef struct ppc_v3_pate_t {
 uint64_t dw1;
 } ppc_v3_pate_t;
 
+/* PMU related structs and defines */
+#define PMU_COUNTERS_NUM 6
+#define PMU_TIMERS_NUM   (PMU_COUNTERS_NUM - 1) /* PMC5 doesn't count cycles */
+typedef enum {
+PMU_EVENT_INVALID = 0,
+PMU_EVENT_INACTIVE,
+PMU_EVENT_CYCLES,
+PMU_EVENT_INSTRUCTIONS,
+} PMUEventType;
+
 /*/
 /* Machine state register bits definition*/
 #define MSR_SF   63 /* Sixty-four-bit modehflags */
@@ -1191,6 +1201,12 @@ struct CPUPPCState {
 uint32_t tm_vscr;
 uint64_t tm_dscr;
 uint64_t tm_tar;
+
+/*
+ * Timers used to fire performance monitor alerts
+ * when counting cycles.
+ */
+QEMUTimer *pmu_cyc_overflow_timers[PMU_TIMERS_NUM];
 };
 
 #define SET_FIT_PERIOD(a_, b_, c_, d_)  \
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 6695985e9b..9610e65c76 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -45,6 +45,7 @@
 #include "helper_regs.h"
 #include "internal.h"
 #include "spr_tcg.h"
+#include "power8-pmu.h"
 
 /* #define PPC_DEBUG_SPR */
 /* #define USE_APPLE_GDB */
@@ -7377,6 +7378,20 @@ static void register_power9_mmu_sprs(CPUPPCState *env)
 #endif
 }
 
+/*
+ * Initialize PMU counter overflow timers for Power8 and
+ * newer Power chips when using TCG.
+ */
+static void init_tcg_pmu_power8(CPUPPCState *env)
+{
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+/* Init PMU overflow timers */
+if (!kvm_enabled()) {
+cpu_ppc_pmu_init(env);
+}
+#endif
+}
+
 static void init_proc_book3s_common(CPUPPCState *env)
 {
 register_ne_601_sprs(env);
@@ -7694,6 +7709,9 @@ static void init_proc_POWER8(CPUPPCState *env)
 register_sdr1_sprs(env);
 register_book3s_207_dbg_sprs(env);
 
+/* Common TCG PMU */
+init_tcg_pmu_power8(env);
+
 /* POWER8 Specific Registers */
 register_book3s_ids_sprs(env);
 register_rmor_sprs(env);
@@ -7888,6 +7906,9 @@ static void init_proc_POWER9(CPUPPCState *env)
 init_proc_book3s_common(env);
 register_book3s_207_dbg_sprs(env);
 
+/* Common TCG PMU */
+init_tcg_pmu_power8(env);
+
 /* POWER8 Specific Registers */
 register_book3s_ids_sprs(env);
 register_amr_sprs(env);
@@ -8104,6 +8125,9 @@ static void init_proc_POWER10(CPUPPCState *env)
 init_proc_book3s_common(env);
 register_book3s_207_dbg_sprs(env);
 
+/* Common TCG PMU */
+init_tcg_pmu_power8(env);
+
 /* POWER8 Specific Registers */
 register_book3s_ids_sprs(env);
 register_amr_sprs(env);
diff --git a/target/ppc/meson.build b/target/ppc/meson.build
index b85f295703..a49a8911e0 100644
--- a/target/ppc/meson.build
+++ b/target/ppc/meson.build
@@ -51,6 +51,7 @@ ppc_softmmu_ss.add(when: 'TARGET_PPC64', if_true: files(
   'mmu-book3s-v3.c',
   'mmu-hash64.c',
   'mmu-radix64.c',
+  'power8-pmu.c',
 ))

[PATCH v8 02/10] target/ppc: PMU basic cycle count for pseries TCG

2021-11-25 Thread Daniel Henrique Barboza

This patch adds the barebones of the PMU logic by enabling cycle
counting. The overall logic goes as follows:

- MMCR0 reg initial value is set to 0x8000 (MMCR0_FC set) to avoid
having to spin the PMU right at system init;

- to retrieve the events that are being profiled, pmc_get_event() will
check the current MMCR0 and MMCR1 value and return the appropriate
PMUEventType. For PMCs 1-4, event 0x2 is the implementation dependent
value of PMU_EVENT_INSTRUCTIONS and event 0x1E is the implementation
dependent value of PMU_EVENT_CYCLES. These events are supported by IBM
Power chips since Power8, at least, and the Linux Perf driver makes use
of these events until kernel v5.15. For PMC1, event 0xF0 is the
architected PowerISA event for cycles. Event 0xFE is the architected
PowerISA event for instructions;

- if the counter is frozen, either via the global MMCR0_FC bit or its
individual frozen counter bit, PMU_EVENT_INACTIVE is returned;

- pmu_update_cycles() will go through each counter and update the
values of all PMCs that are counting cycles. This function will be
called every time a MMCR0 update is done to keep counters values
up to date. Upcoming patches will use this function to allow the
counters to be properly updated during read/write of the PMCs
and MMCR1 writes.

Given that the base CPU frequency is fixed at 1Ghz for both powernv and
pseries clock, cycle calculation assumes that 1 nanosecond equals 1 CPU
cycle. Cycle value is then calculated by adding the elapsed time, in
nanoseconds, of the last cycle update done via pmu_update_cycles().

Signed-off-by: Daniel Henrique Barboza 
---
 target/ppc/cpu.h |  20 +
 target/ppc/cpu_init.c|   6 +-
 target/ppc/helper.h  |   1 +
 target/ppc/power8-pmu-regs.c.inc |  23 +-
 target/ppc/power8-pmu.c  | 122 +++
 target/ppc/spr_tcg.h |   1 +
 6 files changed, 169 insertions(+), 4 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 2ad47b06d0..9c732953f0 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -361,6 +361,9 @@ typedef enum {
 #define MMCR0_FCECE  PPC_BIT(38) /* FC on Enabled Cond or Event */
 #define MMCR0_PMCC0  PPC_BIT(44) /* PMC Control bit 0 */
 #define MMCR0_PMCC1  PPC_BIT(45) /* PMC Control bit 1 */
+#define MMCR0_PMCC   PPC_BITMASK(44, 45) /* PMC Control */
+#define MMCR0_FC14   PPC_BIT(58) /* PMC Freeze Counters 1-4 bit */
+#define MMCR0_FC56   PPC_BIT(59) /* PMC Freeze Counters 5-6 bit */
 /* MMCR0 userspace r/w mask */
 #define MMCR0_UREG_MASK (MMCR0_FC | MMCR0_PMAO | MMCR0_PMAE)
 /* MMCR2 userspace r/w mask */
@@ -373,6 +376,17 @@ typedef enum {
 #define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \
  MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
 
+#define MMCR1_EVT_SIZE 8
+/* extract64() does a right shift before extracting */
+#define MMCR1_PMC1SEL_START 32
+#define MMCR1_PMC1EVT_EXTR (64 - MMCR1_PMC1SEL_START - MMCR1_EVT_SIZE)
+#define MMCR1_PMC2SEL_START 40
+#define MMCR1_PMC2EVT_EXTR (64 - MMCR1_PMC2SEL_START - MMCR1_EVT_SIZE)
+#define MMCR1_PMC3SEL_START 48
+#define MMCR1_PMC3EVT_EXTR (64 - MMCR1_PMC3SEL_START - MMCR1_EVT_SIZE)
+#define MMCR1_PMC4SEL_START 56
+#define MMCR1_PMC4EVT_EXTR (64 - MMCR1_PMC4SEL_START - MMCR1_EVT_SIZE)
+
 /* LPCR bits */
 #define LPCR_VPM0 PPC_BIT(0)
 #define LPCR_VPM1 PPC_BIT(1)
@@ -1207,6 +1221,12 @@ struct CPUPPCState {
  * when counting cycles.
  */
 QEMUTimer *pmu_cyc_overflow_timers[PMU_TIMERS_NUM];
+
+/*
+ * PMU base time value used by the PMU to calculate
+ * running cycles.
+ */
+uint64_t pmu_base_time;
 };
 
 #define SET_FIT_PERIOD(a_, b_, c_, d_)  \
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 9610e65c76..e0b6fe4057 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6821,8 +6821,8 @@ static void register_book3s_pmu_sup_sprs(CPUPPCState *env)
 {
 spr_register_kvm(env, SPR_POWER_MMCR0, "MMCR0",
  SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
- KVM_REG_PPC_MMCR0, 0x);
+ &spr_read_generic, &spr_write_MMCR0,
+ KVM_REG_PPC_MMCR0, 0x8000);
 spr_register_kvm(env, SPR_POWER_MMCR1, "MMCR1",
  SPR_NOACCESS, SPR_NOACCESS,
  &spr_read_generic, &spr_write_generic,
@@ -6870,7 +6870,7 @@ static void register_book3s_pmu_user_sprs(CPUPPCState 
*env)
 spr_register(env, SPR_POWER_UMMCR0, "UMMCR0",
  &spr_read_MMCR0_ureg, &spr_write_MMCR0_ureg,
  &spr_read_ureg, &spr_write_ureg,
- 0x);
+ 0x8000);
 spr_register(env, SPR_POWER_UMMCR1, "UMMCR1",
  &spr_read_ureg, SPR_NOACCESS,
  &spr_read_ureg, &spr_write_ureg,
diff --git a/target/ppc/helper.h b/targ

[PATCH v8 04/10] target/ppc: PMU: update counters on MMCR1 write

2021-11-25 Thread Daniel Henrique Barboza

MMCR1 determines the events to be sampled by the PMU. Updating the
counters at every MMCR1 write ensures that we're not sampling more
or less events by looking only at MMCR0 and the PMCs.

It is worth noticing that both the Book3S PowerPC PMU, and this IBM
Power8+ PMU that we're modeling, also uses MMCRA, MMCR2 and MMCR3 to
control the PMU. These three registers aren't being handled in this
initial implementation, so for now we're controlling all the PMU
aspects using MMCR0, MMCR1 and the PMCs.

Signed-off-by: Daniel Henrique Barboza 
---
 target/ppc/cpu_init.c|  2 +-
 target/ppc/helper.h  |  1 +
 target/ppc/power8-pmu-regs.c.inc | 11 +++
 target/ppc/power8-pmu.c  |  7 +++
 target/ppc/spr_tcg.h |  1 +
 5 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index a7f47ec322..2d72dde26d 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6825,7 +6825,7 @@ static void register_book3s_pmu_sup_sprs(CPUPPCState *env)
  KVM_REG_PPC_MMCR0, 0x8000);
 spr_register_kvm(env, SPR_POWER_MMCR1, "MMCR1",
  SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
+ &spr_read_generic, &spr_write_MMCR1,
  KVM_REG_PPC_MMCR1, 0x);
 spr_register_kvm(env, SPR_POWER_MMCRA, "MMCRA",
  SPR_NOACCESS, SPR_NOACCESS,
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index d7567f75b4..94b4690375 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -21,6 +21,7 @@ DEF_HELPER_1(hrfid, void, env)
 DEF_HELPER_2(store_lpcr, void, env, tl)
 DEF_HELPER_2(store_pcr, void, env, tl)
 DEF_HELPER_2(store_mmcr0, void, env, tl)
+DEF_HELPER_2(store_mmcr1, void, env, tl)
 DEF_HELPER_3(store_pmc, void, env, i32, i64)
 DEF_HELPER_2(read_pmc, tl, env, i32)
 #endif
diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
index f0c9cc343b..25b13ad564 100644
--- a/target/ppc/power8-pmu-regs.c.inc
+++ b/target/ppc/power8-pmu-regs.c.inc
@@ -255,6 +255,12 @@ void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn)
 {
 write_MMCR0_common(ctx, cpu_gpr[gprn]);
 }
+
+void spr_write_MMCR1(DisasContext *ctx, int sprn, int gprn)
+{
+gen_icount_io_start(ctx);
+gen_helper_store_mmcr1(cpu_env, cpu_gpr[gprn]);
+}
 #else
 void spr_read_MMCR0_ureg(DisasContext *ctx, int gprn, int sprn)
 {
@@ -301,6 +307,11 @@ void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn)
 spr_write_generic(ctx, sprn, gprn);
 }
 
+void spr_write_MMCR1(DisasContext *ctx, int sprn, int gprn)
+{
+spr_write_generic(ctx, sprn, gprn);
+}
+
 void spr_write_PMC(DisasContext *ctx, int sprn, int gprn)
 {
 spr_write_generic(ctx, sprn, gprn);
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index 5f2623aa25..acdaee7459 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -145,6 +145,13 @@ void helper_store_mmcr0(CPUPPCState *env, target_ulong 
value)
 }
 }
 
+void helper_store_mmcr1(CPUPPCState *env, uint64_t value)
+{
+pmu_update_cycles(env);
+
+env->spr[SPR_POWER_MMCR1] = value;
+}
+
 target_ulong helper_read_pmc(CPUPPCState *env, uint32_t sprn)
 {
 pmu_update_cycles(env);
diff --git a/target/ppc/spr_tcg.h b/target/ppc/spr_tcg.h
index 1e79a0522a..1d6521eedc 100644
--- a/target/ppc/spr_tcg.h
+++ b/target/ppc/spr_tcg.h
@@ -26,6 +26,7 @@ void spr_noaccess(DisasContext *ctx, int gprn, int sprn);
 void spr_read_generic(DisasContext *ctx, int gprn, int sprn);
 void spr_write_generic(DisasContext *ctx, int sprn, int gprn);
 void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_MMCR1(DisasContext *ctx, int sprn, int gprn);
 void spr_write_PMC(DisasContext *ctx, int sprn, int gprn);
 void spr_read_xer(DisasContext *ctx, int gprn, int sprn);
 void spr_write_xer(DisasContext *ctx, int sprn, int gprn);
-- 
2.31.1

[PATCH v8 06/10] target/ppc: enable PMU instruction count

2021-11-25 Thread Daniel Henrique Barboza

The PMU is already counting cycles by calculating time elapsed in
nanoseconds. Counting instructions is a different matter and requires
another approach.

This patch adds the capability of counting completed instructions
(Perf event PM_INST_CMPL) by counting the amount of instructions
translated in each translation block right before exiting it.

A new pmu_count_insns() helper in translation.c was added to do that.
After verifying that the PMU is running (MMCR0_FC bit not set), call
helper_insns_inc(). This new helper from power8-pmu.c will add the
instructions to the relevant counters. It'll also be responsible for
triggering counter negative overflows as it is already being done with
cycles.

Signed-off-by: Daniel Henrique Barboza 
---
 target/ppc/cpu.h |  1 +
 target/ppc/helper.h  |  1 +
 target/ppc/helper_regs.c |  4 +++
 target/ppc/power8-pmu-regs.c.inc |  6 +
 target/ppc/power8-pmu.c  | 38 ++
 target/ppc/translate.c   | 46 
 6 files changed, 96 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 9b41b022e2..38cd2b5c43 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -656,6 +656,7 @@ enum {
 HFLAGS_PR = 14,  /* MSR_PR */
 HFLAGS_PMCC0 = 15,  /* MMCR0 PMCC bit 0 */
 HFLAGS_PMCC1 = 16,  /* MMCR0 PMCC bit 1 */
+HFLAGS_MMCR0FC = 17, /* MMCR0 FC bit */
 HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
 HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 94b4690375..d8a23e054a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -24,6 +24,7 @@ DEF_HELPER_2(store_mmcr0, void, env, tl)
 DEF_HELPER_2(store_mmcr1, void, env, tl)
 DEF_HELPER_3(store_pmc, void, env, i32, i64)
 DEF_HELPER_2(read_pmc, tl, env, i32)
+DEF_HELPER_2(insns_inc, void, env, i32)
 #endif
 DEF_HELPER_1(check_tlb_flush_local, void, env)
 DEF_HELPER_1(check_tlb_flush_global, void, env)
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index 99562edd57..875c2fdfc6 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -115,6 +115,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
 if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
 hflags |= 1 << HFLAGS_PMCC1;
 }
+if (env->spr[SPR_POWER_MMCR0] & MMCR0_FC) {
+hflags |= 1 << HFLAGS_MMCR0FC;
+}
+
 
 #ifndef CONFIG_USER_ONLY
 if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
index 25b13ad564..580e4e41b2 100644
--- a/target/ppc/power8-pmu-regs.c.inc
+++ b/target/ppc/power8-pmu-regs.c.inc
@@ -113,6 +113,12 @@ static void write_MMCR0_common(DisasContext *ctx, TCGv val)
  */
 gen_icount_io_start(ctx);
 gen_helper_store_mmcr0(cpu_env, val);
+
+/*
+ * End the translation block because MMCR0 writes can change
+ * ctx->pmu_frozen.
+ */
+ctx->base.is_jmp = DISAS_EXIT_UPDATE;
 }
 
 void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn)
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index 01e0b9b8fc..59d0def79d 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -112,6 +112,30 @@ static PMUEventType pmc_get_event(CPUPPCState *env, int 
sprn)
 return evt_type;
 }
 
+static bool pmu_increment_insns(CPUPPCState *env, uint32_t num_insns)
+{
+bool overflow_triggered = false;
+int sprn;
+
+/* PMC6 never counts instructions */
+for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
+if (pmc_get_event(env, sprn) != PMU_EVENT_INSTRUCTIONS) {
+continue;
+}
+
+env->spr[sprn] += num_insns;
+
+if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL &&
+pmc_has_overflow_enabled(env, sprn)) {
+
+overflow_triggered = true;
+env->spr[sprn] = PMC_COUNTER_NEGATIVE_VAL;
+}
+}
+
+return overflow_triggered;
+}
+
 static void pmu_update_cycles(CPUPPCState *env)
 {
 uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
@@ -258,6 +282,20 @@ static void fire_PMC_interrupt(PowerPCCPU *cpu)
 return;
 }
 
+/* This helper assumes that the PMC is running. */
+void helper_insns_inc(CPUPPCState *env, uint32_t num_insns)
+{
+bool overflow_triggered;
+PowerPCCPU *cpu;
+
+overflow_triggered = pmu_increment_insns(env, num_insns);
+
+if (overflow_triggered) {
+cpu = env_archcpu(env);
+fire_PMC_interrupt(cpu);
+}
+}
+
 static void cpu_ppc_pmu_timer_cb(void *opaque)
 {
 PowerPCCPU *cpu = opaque;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 9960df6e18..ccc83d0603 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -177,6 +177,7 @@ struct DisasContext {
 bool hr;
 bool mmcr0_pmcc0;
 bool mmcr0_pmcc1;
+bool pmu_frozen;
 ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr *

[PATCH v8 03/10] target/ppc: PMU: update counters on PMCs r/w

2021-11-25 Thread Daniel Henrique Barboza

Calling pmu_update_cycles() on every PMC read/write operation ensures
that the values being fetched are up to date with the current PMU state.

In theory we can get away by just trapping PMCs reads, but we're going
to trap PMC writes to deal with counter overflow logic later on.  Let's
put the required wiring for that and make our lives a bit easier in the
next patches.

Signed-off-by: Daniel Henrique Barboza 
---
 target/ppc/cpu_init.c| 12 ++--
 target/ppc/helper.h  |  2 ++
 target/ppc/power8-pmu-regs.c.inc | 29 +++--
 target/ppc/power8-pmu.c  | 14 ++
 target/ppc/spr_tcg.h |  2 ++
 5 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index e0b6fe4057..a7f47ec322 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6833,27 +6833,27 @@ static void register_book3s_pmu_sup_sprs(CPUPPCState 
*env)
  KVM_REG_PPC_MMCRA, 0x);
 spr_register_kvm(env, SPR_POWER_PMC1, "PMC1",
  SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
+ &spr_read_PMC, &spr_write_PMC,
  KVM_REG_PPC_PMC1, 0x);
 spr_register_kvm(env, SPR_POWER_PMC2, "PMC2",
  SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
+ &spr_read_PMC, &spr_write_PMC,
  KVM_REG_PPC_PMC2, 0x);
 spr_register_kvm(env, SPR_POWER_PMC3, "PMC3",
  SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
+ &spr_read_PMC, &spr_write_PMC,
  KVM_REG_PPC_PMC3, 0x);
 spr_register_kvm(env, SPR_POWER_PMC4, "PMC4",
  SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
+ &spr_read_PMC, &spr_write_PMC,
  KVM_REG_PPC_PMC4, 0x);
 spr_register_kvm(env, SPR_POWER_PMC5, "PMC5",
  SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
+ &spr_read_PMC, &spr_write_PMC,
  KVM_REG_PPC_PMC5, 0x);
 spr_register_kvm(env, SPR_POWER_PMC6, "PMC6",
  SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
+ &spr_read_PMC, &spr_write_PMC,
  KVM_REG_PPC_PMC6, 0x);
 spr_register_kvm(env, SPR_POWER_SIAR, "SIAR",
  SPR_NOACCESS, SPR_NOACCESS,
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index ea60a7493c..d7567f75b4 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -21,6 +21,8 @@ DEF_HELPER_1(hrfid, void, env)
 DEF_HELPER_2(store_lpcr, void, env, tl)
 DEF_HELPER_2(store_pcr, void, env, tl)
 DEF_HELPER_2(store_mmcr0, void, env, tl)
+DEF_HELPER_3(store_pmc, void, env, i32, i64)
+DEF_HELPER_2(read_pmc, tl, env, i32)
 #endif
 DEF_HELPER_1(check_tlb_flush_local, void, env)
 DEF_HELPER_1(check_tlb_flush_global, void, env)
diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
index fbb8977641..f0c9cc343b 100644
--- a/target/ppc/power8-pmu-regs.c.inc
+++ b/target/ppc/power8-pmu-regs.c.inc
@@ -181,13 +181,23 @@ void spr_write_MMCR2_ureg(DisasContext *ctx, int sprn, 
int gprn)
 tcg_temp_free(masked_gprn);
 }
 
+void spr_read_PMC(DisasContext *ctx, int gprn, int sprn)
+{
+TCGv_i32 t_sprn = tcg_const_i32(sprn);
+
+gen_icount_io_start(ctx);
+gen_helper_read_pmc(cpu_gpr[gprn], cpu_env, t_sprn);
+
+tcg_temp_free_i32(t_sprn);
+}
+
 void spr_read_PMC14_ureg(DisasContext *ctx, int gprn, int sprn)
 {
 if (!spr_groupA_read_allowed(ctx)) {
 return;
 }
 
-spr_read_ureg(ctx, gprn, sprn);
+spr_read_PMC(ctx, gprn, sprn + 0x10);
 }
 
 void spr_read_PMC56_ureg(DisasContext *ctx, int gprn, int sprn)
@@ -206,13 +216,23 @@ void spr_read_PMC56_ureg(DisasContext *ctx, int gprn, int 
sprn)
 spr_read_PMC14_ureg(ctx, gprn, sprn);
 }
 
+void spr_write_PMC(DisasContext *ctx, int sprn, int gprn)
+{
+TCGv_i32 t_sprn = tcg_const_i32(sprn);
+
+gen_icount_io_start(ctx);
+gen_helper_store_pmc(cpu_env, t_sprn, cpu_gpr[gprn]);
+
+tcg_temp_free_i32(t_sprn);
+}
+
 void spr_write_PMC14_ureg(DisasContext *ctx, int sprn, int gprn)
 {
 if (!spr_groupA_write_allowed(ctx)) {
 return;
 }
 
-spr_write_ureg(ctx, sprn, gprn);
+spr_write_PMC(ctx, sprn + 0x10, gprn);
 }
 
 void spr_write_PMC56_ureg(DisasContext *ctx, int sprn, int gprn)
@@ -280,4 +300,9 @@ void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn)
 {
 spr_write_generic(ctx, sprn, gprn);
 }
+
+void spr_write_PMC(DisasContext *ctx, int sprn, int gprn)
+{
+spr_write_generic(ctx, sprn, gprn);
+}
 #endif /* defined(TARGET_PP

[PATCH v8 08/10] PPC64/TCG: Implement 'rfebb' instruction

2021-11-25 Thread Daniel Henrique Barboza

An Event-Based Branch (EBB) allows applications to change the NIA when a
event-based exception occurs. Event-based exceptions are enabled by
setting the Branch Event Status and Control Register (BESCR). If the
event-based exception is enabled when the exception occurs, an EBB
happens.

The following operations happens during an EBB:

- Global Enable (GE) bit of BESCR is set to 0;
- bits 0-61 of the Event-Based Branch Return Register (EBBRR) are set
to the the effective address of the NIA that would have executed if the EBB
didn't happen;
- Instruction fetch and execution will continue in the effective address
contained in the Event-Based Branch Handler Register (EBBHR).

The EBB Handler will process the event and then execute the Return From
Event-Based Branch (rfebb) instruction. rfebb sets BESCR_GE and then
redirects execution to the address pointed in EBBRR. This process is
described in the PowerISA v3.1, Book II, Chapter 6 [1].

This patch implements the rfebb instruction. Descriptions of all
relevant BESCR bits are also added - this patch is only using BESCR_GE,
but the next patches will use the remaining bits.

[1] https://wiki.raptorcs.com/w/images/f/f5/PowerISA_public.v3.1.pdf

Reviewed-by: Matheus Ferst 
Signed-off-by: Daniel Henrique Barboza 
---
 target/ppc/cpu.h   | 13 ++
 target/ppc/excp_helper.c   | 31 
 target/ppc/helper.h|  1 +
 target/ppc/insn32.decode   |  5 
 target/ppc/translate.c |  2 ++
 target/ppc/translate/branch-impl.c.inc | 33 ++
 6 files changed, 85 insertions(+)
 create mode 100644 target/ppc/translate/branch-impl.c.inc

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 993884164f..edb4488176 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -393,6 +393,19 @@ typedef enum {
 /* PMU uses CTRL_RUN to sample PM_RUN_INST_CMPL */
 #define CTRL_RUN PPC_BIT(63)
 
+/* EBB/BESCR bits */
+/* Global Enable */
+#define BESCR_GE PPC_BIT(0)
+/* External Event-based Exception Enable */
+#define BESCR_EE PPC_BIT(30)
+/* Performance Monitor Event-based Exception Enable */
+#define BESCR_PME PPC_BIT(31)
+/* External Event-based Exception Occurred */
+#define BESCR_EEO PPC_BIT(62)
+/* Performance Monitor Event-based Exception Occurred */
+#define BESCR_PMEO PPC_BIT(63)
+#define BESCR_INVALID PPC_BITMASK(32, 33)
+
 /* LPCR bits */
 #define LPCR_VPM0 PPC_BIT(0)
 #define LPCR_VPM1 PPC_BIT(1)
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 17607adbe4..7ead32279c 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -1250,6 +1250,37 @@ void helper_hrfid(CPUPPCState *env)
 }
 #endif
 
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+void helper_rfebb(CPUPPCState *env, target_ulong s)
+{
+target_ulong msr = env->msr;
+
+/*
+ * Handling of BESCR bits 32:33 according to PowerISA v3.1:
+ *
+ * "If BESCR 32:33 != 0b00 the instruction is treated as if
+ *  the instruction form were invalid."
+ */
+if (env->spr[SPR_BESCR] & BESCR_INVALID) {
+raise_exception_err(env, POWERPC_EXCP_PROGRAM,
+POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL);
+}
+
+env->nip = env->spr[SPR_EBBRR];
+
+/* Switching to 32-bit ? Crop the nip */
+if (!msr_is_64bit(env, msr)) {
+env->nip = (uint32_t)env->spr[SPR_EBBRR];
+}
+
+if (s) {
+env->spr[SPR_BESCR] |= BESCR_GE;
+} else {
+env->spr[SPR_BESCR] &= ~BESCR_GE;
+}
+}
+#endif
+
 /*/
 /* Embedded PowerPC specific helpers */
 void helper_40x_rfci(CPUPPCState *env)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index d8a23e054a..b0535b389b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -18,6 +18,7 @@ DEF_HELPER_2(pminsn, void, env, i32)
 DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(rfscv, void, env)
 DEF_HELPER_1(hrfid, void, env)
+DEF_HELPER_2(rfebb, void, env, tl)
 DEF_HELPER_2(store_lpcr, void, env, tl)
 DEF_HELPER_2(store_pcr, void, env, tl)
 DEF_HELPER_2(store_mmcr0, void, env, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index e135b8aba4..6cad783dde 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -427,3 +427,8 @@ XXSPLTW 00 . ---.. . 010100100 . .  @XX2
 ## VSX Vector Load Special Value Instruction
 
 LXVKQ   00 . 1 . 0101101000 .   @X_uim5
+
+### rfebb
+&XL_s   s:uint8_t
+@XL_s   ..-- s:1 .. -   &XL_s
+RFEBB   010011-- .   0010010010 -   @XL_s
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index d0e361a9d1..d643a83a51 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -7467,6 +7467,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, 
arg_PLS_D *a)
 
 #include "translat

[PATCH v8 09/10] target/ppc: PMU Event-Based exception support

2021-11-25 Thread Daniel Henrique Barboza

From: Gustavo Romero 

Following up the rfebb implementation, this patch adds the EBB exception
support that are triggered by Performance Monitor alerts. This exception
occurs when an enabled PMU condition or event happens and both MMCR0_EBE
and BESCR_PME are set.

The supported PM alerts will consist of counter negative conditions of
the PMU counters. This will be achieved by a timer mechanism that will
predict when a counter becomes negative. The PMU timer callback will set
the appropriate bits in MMCR0 and fire a PMC interrupt. The EBB
exception code will then set the appropriate BESCR bits, set the next
instruction pointer to the address pointed by the return register
(SPR_EBBRR), and redirect execution to the handler (pointed by
SPR_EBBHR).

CC: Gustavo Romero 
Signed-off-by: Gustavo Romero 
Signed-off-by: Daniel Henrique Barboza 
---
 target/ppc/cpu.h |  5 -
 target/ppc/excp_helper.c | 29 +
 target/ppc/power8-pmu.c  | 35 +--
 3 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index edb4488176..28ae904d76 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -129,8 +129,10 @@ enum {
 /* ISA 3.00 additions */
 POWERPC_EXCP_HVIRT= 101,
 POWERPC_EXCP_SYSCALL_VECTORED = 102, /* scv exception 
*/
+POWERPC_EXCP_EBB = 103, /* Event-based branch exception  */
+
 /* EOL   */
-POWERPC_EXCP_NB   = 103,
+POWERPC_EXCP_NB   = 104,
 /* QEMU exceptions: special cases we want to stop translation*/
 POWERPC_EXCP_SYSCALL_USER = 0x203, /* System call in user mode only  */
 };
@@ -2453,6 +2455,7 @@ enum {
 PPC_INTERRUPT_HMI,/* Hypervisor Maintenance interrupt*/
 PPC_INTERRUPT_HDOORBELL,  /* Hypervisor Doorbell interrupt*/
 PPC_INTERRUPT_HVIRT,  /* Hypervisor virtualization interrupt  */
+PPC_INTERRUPT_PMC,/* Hypervisor virtualization interrupt  */
 };
 
 /* Processor Compatibility mask (PCR) */
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 7ead32279c..a26d266fe6 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -799,6 +799,23 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
 cpu_abort(cs, "Non maskable external exception "
   "is not implemented yet !\n");
 break;
+case POWERPC_EXCP_EBB:   /* Event-based branch exception */
+if ((env->spr[SPR_FSCR] & (1ull << FSCR_EBB)) &&
+(env->spr[SPR_BESCR] & BESCR_GE) &&
+(env->spr[SPR_BESCR] & BESCR_PME)) {
+target_ulong nip;
+
+env->spr[SPR_BESCR] &= ~BESCR_GE;   /* Clear GE */
+env->spr[SPR_BESCR] |= BESCR_PMEO;  /* Set PMEO */
+env->spr[SPR_EBBRR] = env->nip; /* Save NIP for rfebb insn */
+nip = env->spr[SPR_EBBHR];  /* EBB handler */
+powerpc_set_excp_state(cpu, nip, env->msr);
+}
+/*
+ * This interrupt is handled by userspace. No need
+ * to proceed.
+ */
+return;
 default:
 excp_invalid:
 cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
@@ -1046,6 +1063,18 @@ static void ppc_hw_interrupt(CPUPPCState *env)
 powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_THERM);
 return;
 }
+/* PMC -> Event-based branch exception */
+if (env->pending_interrupts & (1 << PPC_INTERRUPT_PMC)) {
+/*
+ * Performance Monitor event-based exception can only
+ * occur in problem state.
+ */
+if (msr_pr == 1) {
+env->pending_interrupts &= ~(1 << PPC_INTERRUPT_PMC);
+powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_EBB);
+return;
+}
+}
 }
 
 if (env->resume_as_sreset) {
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index 98797f0b2f..330e0d2ae8 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -290,6 +290,15 @@ void helper_store_pmc(CPUPPCState *env, uint32_t sprn, 
uint64_t value)
 pmc_update_overflow_timer(env, sprn);
 }
 
+static void pmu_delete_timers(CPUPPCState *env)
+{
+int i;
+
+for (i = 0; i < PMU_TIMERS_NUM; i++) {
+timer_del(env->pmu_cyc_overflow_timers[i]);
+}
+}
+
 static void fire_PMC_interrupt(PowerPCCPU *cpu)
 {
 CPUPPCState *env = &cpu->env;
@@ -298,8 +307,30 @@ static void fire_PMC_interrupt(PowerPCCPU *cpu)
 return;
 }
 
-/* PMC interrupt not implemented yet */
-return;
+pmu_update_cycles(env);
+
+if (env->spr[SPR_POWER_MMCR0] & MMCR0_FCECE) {
+env->spr[SPR_POWER_MMCR0] &= ~MMCR0_FCECE;
+env->spr[SPR_POWER_MMCR0] |= MMCR0_FC;
+
+/*

[PATCH v8 07/10] target/ppc/power8-pmu.c: add PM_RUN_INST_CMPL (0xFA) event

2021-11-25 Thread Daniel Henrique Barboza

PM_RUN_INST_CMPL, instructions completed with the run latch set, is
the architected PowerISA v3.1 event defined with PMC4SEL = 0xFA.

Implement it by checking for the CTRL RUN bit before incrementing the
counter. To make this work properly we also need to force a new
translation block each time SPR_CTRL is written. A small tweak in
pmu_increment_insns() is then needed to only increment this event
if the thread has the run latch.

Signed-off-by: Daniel Henrique Barboza 
---
 target/ppc/cpu.h|  4 
 target/ppc/cpu_init.c   |  2 +-
 target/ppc/power8-pmu.c | 24 ++--
 target/ppc/spr_tcg.h|  1 +
 target/ppc/translate.c  | 12 
 5 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 38cd2b5c43..993884164f 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -304,6 +304,7 @@ typedef enum {
 PMU_EVENT_INACTIVE,
 PMU_EVENT_CYCLES,
 PMU_EVENT_INSTRUCTIONS,
+PMU_EVENT_INSN_RUN_LATCH,
 } PMUEventType;
 
 /*/
@@ -389,6 +390,9 @@ typedef enum {
 #define MMCR1_PMC4SEL_START 56
 #define MMCR1_PMC4EVT_EXTR (64 - MMCR1_PMC4SEL_START - MMCR1_EVT_SIZE)
 
+/* PMU uses CTRL_RUN to sample PM_RUN_INST_CMPL */
+#define CTRL_RUN PPC_BIT(63)
+
 /* LPCR bits */
 #define LPCR_VPM0 PPC_BIT(0)
 #define LPCR_VPM1 PPC_BIT(1)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 2d72dde26d..ecce4c7c1e 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6749,7 +6749,7 @@ static void register_book3s_ctrl_sprs(CPUPPCState *env)
 {
 spr_register(env, SPR_CTRL, "SPR_CTRL",
  SPR_NOACCESS, SPR_NOACCESS,
- SPR_NOACCESS, &spr_write_generic,
+ SPR_NOACCESS, &spr_write_CTRL,
  0x);
 spr_register(env, SPR_UCTRL, "SPR_UCTRL",
  &spr_read_ureg, SPR_NOACCESS,
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index 59d0def79d..98797f0b2f 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -96,6 +96,15 @@ static PMUEventType pmc_get_event(CPUPPCState *env, int sprn)
 evt_type = PMU_EVENT_CYCLES;
 }
 break;
+case 0xFA:
+/*
+ * PMC4SEL = 0xFA is the "instructions completed
+ * with run latch set" event.
+ */
+if (sprn == SPR_POWER_PMC4) {
+evt_type = PMU_EVENT_INSN_RUN_LATCH;
+}
+break;
 case 0xFE:
 /*
  * PMC1SEL = 0xFE is the architected PowerISA v3.1
@@ -119,11 +128,22 @@ static bool pmu_increment_insns(CPUPPCState *env, 
uint32_t num_insns)
 
 /* PMC6 never counts instructions */
 for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC5; sprn++) {
-if (pmc_get_event(env, sprn) != PMU_EVENT_INSTRUCTIONS) {
+PMUEventType evt_type = pmc_get_event(env, sprn);
+bool insn_event = evt_type == PMU_EVENT_INSTRUCTIONS ||
+  evt_type == PMU_EVENT_INSN_RUN_LATCH;
+
+if (pmc_is_inactive(env, sprn) || !insn_event) {
 continue;
 }
 
-env->spr[sprn] += num_insns;
+if (evt_type == PMU_EVENT_INSTRUCTIONS) {
+env->spr[sprn] += num_insns;
+}
+
+if (evt_type == PMU_EVENT_INSN_RUN_LATCH &&
+env->spr[SPR_CTRL] & CTRL_RUN) {
+env->spr[sprn] += num_insns;
+}
 
 if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL &&
 pmc_has_overflow_enabled(env, sprn)) {
diff --git a/target/ppc/spr_tcg.h b/target/ppc/spr_tcg.h
index 1d6521eedc..f98d97c0ba 100644
--- a/target/ppc/spr_tcg.h
+++ b/target/ppc/spr_tcg.h
@@ -28,6 +28,7 @@ void spr_write_generic(DisasContext *ctx, int sprn, int gprn);
 void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn);
 void spr_write_MMCR1(DisasContext *ctx, int sprn, int gprn);
 void spr_write_PMC(DisasContext *ctx, int sprn, int gprn);
+void spr_write_CTRL(DisasContext *ctx, int sprn, int gprn);
 void spr_read_xer(DisasContext *ctx, int gprn, int sprn);
 void spr_write_xer(DisasContext *ctx, int sprn, int gprn);
 void spr_read_lr(DisasContext *ctx, int gprn, int sprn);
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index ccc83d0603..d0e361a9d1 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -403,6 +403,18 @@ void spr_write_generic(DisasContext *ctx, int sprn, int 
gprn)
 spr_store_dump_spr(sprn);
 }
 
+void spr_write_CTRL(DisasContext *ctx, int sprn, int gprn)
+{
+spr_write_generic(ctx, sprn, gprn);
+
+/*
+ * SPR_CTRL writes must force a new translation block,
+ * allowing the PMU to calculate the run latch events with
+ * more accuracy.
+ */
+ctx->base.is_jmp = DISAS_EXIT_UPDATE;
+}
+
 #if !defined(CONFIG_USER_ONLY)
 void spr_write_generic32(DisasContext *ctx, int sprn, int gprn)
 {
-- 
2.31.1

[PATCH v8 05/10] target/ppc: enable PMU counter overflow with cycle events

2021-11-25 Thread Daniel Henrique Barboza

The PowerISA v3.1 defines that if the proper bits are set (MMCR0_PMC1CE
for PMC1 and MMCR0_PMCjCE for the remaining PMCs), counter negative
conditions are enabled. This means that if the counter value overflows
(i.e. exceeds 0x8000) a performance monitor alert will occur. This alert
can trigger an event-based exception (to be implemented in the next patches)
if the MMCR0_EBE bit is set.

For now, overflowing the counter when the PMC is counting cycles will
just trigger a performance monitor alert. This is done by starting the
overflow timer to expire in the moment the overflow would be occuring. The
timer will call fire_PMC_interrupt() (via cpu_ppc_pmu_timer_cb) which will
trigger the PMU alert and, if the conditions are met, an EBB exception.

Signed-off-by: Daniel Henrique Barboza 
---
 target/ppc/cpu.h|  2 ++
 target/ppc/power8-pmu.c | 80 +
 2 files changed, 82 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 9c732953f0..9b41b022e2 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -364,6 +364,8 @@ typedef enum {
 #define MMCR0_PMCC   PPC_BITMASK(44, 45) /* PMC Control */
 #define MMCR0_FC14   PPC_BIT(58) /* PMC Freeze Counters 1-4 bit */
 #define MMCR0_FC56   PPC_BIT(59) /* PMC Freeze Counters 5-6 bit */
+#define MMCR0_PMC1CE PPC_BIT(48) /* MMCR0 PMC1 Condition Enabled */
+#define MMCR0_PMCjCE PPC_BIT(49) /* MMCR0 PMCj Condition Enabled */
 /* MMCR0 userspace r/w mask */
 #define MMCR0_UREG_MASK (MMCR0_FC | MMCR0_PMAO | MMCR0_PMAE)
 /* MMCR2 userspace r/w mask */
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index acdaee7459..01e0b9b8fc 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -23,6 +23,8 @@
 
 #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
 
+#define PMC_COUNTER_NEGATIVE_VAL 0x8000UL
+
 static bool pmc_is_inactive(CPUPPCState *env, int sprn)
 {
 if (env->spr[SPR_POWER_MMCR0] & MMCR0_FC) {
@@ -36,6 +38,15 @@ static bool pmc_is_inactive(CPUPPCState *env, int sprn)
 return env->spr[SPR_POWER_MMCR0] & MMCR0_FC56;
 }
 
+static bool pmc_has_overflow_enabled(CPUPPCState *env, int sprn)
+{
+if (sprn == SPR_POWER_PMC1) {
+return env->spr[SPR_POWER_MMCR0] & MMCR0_PMC1CE;
+}
+
+return env->spr[SPR_POWER_MMCR0] & MMCR0_PMCjCE;
+}
+
 /*
  * For PMCs 1-4, IBM POWER chips has support for an implementation
  * dependent event, 0x1E, that enables cycle counting. The Linux kernel
@@ -123,6 +134,70 @@ static void pmu_update_cycles(CPUPPCState *env)
 env->pmu_base_time = now;
 }
 
+/*
+ * Helper function to retrieve the cycle overflow timer of the
+ * 'sprn' counter. Given that PMC5 doesn't have a timer, the
+ * amount of timers is less than the total counters and the PMC6
+ * timer is the last of the array.
+ */
+static QEMUTimer *get_cyc_overflow_timer(CPUPPCState *env, int sprn)
+{
+if (sprn == SPR_POWER_PMC5) {
+return NULL;
+}
+
+if (sprn == SPR_POWER_PMC6) {
+return env->pmu_cyc_overflow_timers[PMU_TIMERS_NUM - 1];
+}
+
+return env->pmu_cyc_overflow_timers[sprn - SPR_POWER_PMC1];
+}
+
+static void pmc_update_overflow_timer(CPUPPCState *env, int sprn)
+{
+QEMUTimer *pmc_overflow_timer;
+int64_t timeout;
+
+/* PMC5 does not have an overflow timer */
+if (sprn == SPR_POWER_PMC5) {
+return;
+}
+
+pmc_overflow_timer = get_cyc_overflow_timer(env, sprn);
+
+if (pmc_get_event(env, sprn) != PMU_EVENT_CYCLES ||
+!pmc_has_overflow_enabled(env, sprn)) {
+/* Overflow timer is not needed for this counter */
+timer_del(pmc_overflow_timer);
+return;
+}
+
+if (env->spr[sprn] >= PMC_COUNTER_NEGATIVE_VAL) {
+timeout =  0;
+} else {
+timeout = PMC_COUNTER_NEGATIVE_VAL - env->spr[sprn];
+}
+
+/*
+ * Use timer_mod_anticipate() because an overflow timer might
+ * be already running for this PMC.
+ */
+timer_mod_anticipate(pmc_overflow_timer, env->pmu_base_time + timeout);
+}
+
+static void pmu_update_overflow_timers(CPUPPCState *env)
+{
+int sprn;
+
+/*
+ * Scroll through all PMCs and start counter overflow timers for
+ * PM_CYC events, if needed.
+ */
+for (sprn = SPR_POWER_PMC1; sprn <= SPR_POWER_PMC6; sprn++) {
+pmc_update_overflow_timer(env, sprn);
+}
+}
+
 void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
 {
 target_ulong curr_value = env->spr[SPR_POWER_MMCR0];
@@ -143,6 +218,9 @@ void helper_store_mmcr0(CPUPPCState *env, target_ulong 
value)
 (curr_FC != new_FC)) {
 hreg_compute_hflags(env);
 }
+
+/* Update cycle overflow timers with the current MMCR0 state */
+pmu_update_overflow_timers(env);
 }
 
 void helper_store_mmcr1(CPUPPCState *env, uint64_t value)
@@ -164,6 +242,8 @@ void helper_store_pmc(CPUPPCState *env, uint32_t sprn, 
uint64_t value)
 pmu_update_cycles(env);
 
 env->spr[sprn] = valu

Re: SEV guest attestation

2021-11-25 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
> > * Daniel P. Berrangé (berra...@redhat.com) wrote:
> > > On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
> > > > Hi,
> > > > 
> > > > We recently discussed a way for remote SEV guest attestation through 
> > > > QEMU.
> > > > My initial approach was to get data needed for attestation through 
> > > > different
> > > > QMP commands (all of which are already available, so no changes required
> > > > there), deriving hashes and certificate data; and collecting all of this
> > > > into a new QMP struct (SevLaunchStart, which would include the VM's 
> > > > policy,
> > > > secret, and GPA) which would need to be upstreamed into QEMU. Once this 
> > > > is
> > > > provided, QEMU would then need to have support for attestation before a 
> > > > VM
> > > > is started. Upon speaking to Dave about this proposal, he mentioned that
> > > > this may not be the best approach, as some situations would render the
> > > > attestation unavailable, such as the instance where a VM is running in a
> > > > cloud, and a guest owner would like to perform attestation via QMP (a 
> > > > likely
> > > > scenario), yet a cloud provider cannot simply let anyone pass arbitrary 
> > > > QMP
> > > > commands, as this could be an issue.
> > > 
> > > As a general point, QMP is a low level QEMU implementation detail,
> > > which is generally expected to be consumed exclusively on the host
> > > by a privileged mgmt layer, which will in turn expose its own higher
> > > level APIs to users or other apps. I would not expect to see QMP
> > > exposed to anything outside of the privileged host layer.
> > > 
> > > We also use the QAPI protocol for QEMU guest agent commmunication,
> > > however, that is a distinct service from QMP on the host. It shares
> > > most infra with QMP but has a completely diffent command set. On the
> > > host it is not consumed inside QEMU, but instead consumed by a
> > > mgmt app like libvirt. 
> > > 
> > > > So I ask, does anyone involved in QEMU's SEV implementation have any 
> > > > input
> > > > on a quality way to perform guest attestation? If so, I'd be interested.
> > > 
> > > I think what's missing is some clearer illustrations of how this
> > > feature is expected to be consumed in some real world application
> > > and the use cases we're trying to solve.
> > > 
> > > I'd like to understand how it should fit in with common libvirt
> > > applications across the different virtualization management
> > > scenarios - eg virsh (command line),  virt-manger (local desktop
> > > GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
> > > And of course any non-traditional virt use cases that might be
> > > relevant such as Kata.
> > 
> > That's still not that clear; I know Alice and Sergio have some ideas
> > (cc'd).
> > There's also some standardisation efforts (e.g. 
> > https://www.potaroo.net/ietf/html/ids-wg-rats.html 
> > and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
> > ) - that I can't claim to fully understand.
> > However, there are some themes that are emerging:
> > 
> >   a) One use is to only allow a VM to access some private data once we
> > prove it's the VM we expect running in a secure/confidential system
> >   b) (a) normally involves requesting some proof from the VM and then
> > providing it some confidential data/a key if it's OK
> 
> I guess I'm wondering what the threat we're protecting against is,
> and / or which pieces of the stack we can trust ?

Yeh and that varies depending who you speak to.

> eg, if the host has 2 VMs running, we verify the 1st and provide
> its confidental data back to the host, what stops the host giving
> that dat to the 2nd non-verified VM ? 
> 
> Presumably the data has to be encrypted with a key that is uniquely
> tied to this specific boot attempt of the verified VM, and not
> accessible to any other VM, or to future boots of this VM ?

In the SEV/-ES case the attestation is uniquefied by a Nonce I think
and there's sometype of session key used (can't remember the details)
and the returning of the key to the VM is encrypted through that same
channel; so you know you're giving the key to the thing you attested.

However, since in SEV/ES you only measure the firmware (and number of
CPUs) all VMs look pretty much identical at that point - distinguishing
them relies either on:
  a) In the GRUB/OVMF case you are relying on the key you return to the
VM succesfully decrypting the disk and the embedded Grub being able to
load the kernel/initrd (You attested the embedded Grub, so you trust
it to do that)
  b) In the separate kernel/initrd case you do have the kernel command
line measured as well.

> >   c) RATs splits the problem up:
> > 
> > https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html#name-architectural-overview
> > I don't fully understand the split yet, but in

[PATCH v8 10/10] target/ppc/excp_helper.c: EBB handling adjustments

2021-11-25 Thread Daniel Henrique Barboza

The current logic is only considering event-based exceptions triggered
by the performance monitor. This is true now, but we might want to add
support for external event-based exceptions in the future.

Let's make it a bit easier to do so by adding the bit logic that would
happen in case we were dealing with an external event-based exception.

While we're at it, add a few comments explaining why we're setting and
clearing BESCR bits.

Signed-off-by: Daniel Henrique Barboza 
---
 target/ppc/excp_helper.c | 45 ++--
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index a26d266fe6..42e2fee9c8 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -801,14 +801,47 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
 break;
 case POWERPC_EXCP_EBB:   /* Event-based branch exception */
 if ((env->spr[SPR_FSCR] & (1ull << FSCR_EBB)) &&
-(env->spr[SPR_BESCR] & BESCR_GE) &&
-(env->spr[SPR_BESCR] & BESCR_PME)) {
+(env->spr[SPR_BESCR] & BESCR_GE)) {
 target_ulong nip;
 
-env->spr[SPR_BESCR] &= ~BESCR_GE;   /* Clear GE */
-env->spr[SPR_BESCR] |= BESCR_PMEO;  /* Set PMEO */
-env->spr[SPR_EBBRR] = env->nip; /* Save NIP for rfebb insn */
-nip = env->spr[SPR_EBBHR];  /* EBB handler */
+/*
+ * If we have Performance Monitor Event-Based exception
+ * enabled (BESCR_PME) and a Performance Monitor alert
+ * occurred (MMCR0_PMAO), clear BESCR_PME and set BESCR_PMEO
+ * (Performance Monitor Event-Based Exception Occurred).
+ *
+ * Software is responsible for clearing both BESCR_PMEO and
+ * MMCR0_PMAO after the event has been handled.
+ */
+if ((env->spr[SPR_BESCR] & BESCR_PME) &&
+(env->spr[SPR_POWER_MMCR0] & MMCR0_PMAO)) {
+env->spr[SPR_BESCR] &= ~BESCR_PME;
+env->spr[SPR_BESCR] |= BESCR_PMEO;
+}
+
+/*
+ * In the case of External Event-Based exceptions, do a
+ * similar logic with BESCR_EE and BESCR_EEO. BESCR_EEO must
+ * also be cleared by software.
+ *
+ * PowerISA 3.1 considers that we'll not have BESCR_PMEO and
+ * BESCR_EEO set at the same time. We can check for BESCR_PMEO
+ * being not set in step above to see if this exception was
+ * trigged by an external event.
+ */
+if (env->spr[SPR_BESCR] & BESCR_EE &&
+!(env->spr[SPR_BESCR] & BESCR_PMEO)) {
+env->spr[SPR_BESCR] &= ~BESCR_EE;
+env->spr[SPR_BESCR] |= BESCR_EEO;
+}
+
+/*
+ * Clear BESCR_GE, save NIP for 'rfebb' and point the
+ * execution to the event handler (SPR_EBBHR) address.
+ */
+env->spr[SPR_BESCR] &= ~BESCR_GE;
+env->spr[SPR_EBBRR] = env->nip;
+nip = env->spr[SPR_EBBHR];
 powerpc_set_excp_state(cpu, nip, env->msr);
 }
 /*
-- 
2.31.1

Re: [PATCH] hw/intc: cannot clear GICv3 ITS CTLR[Enabled] bit

2021-11-25 Thread Alex Bennée



Shashi Mallela  writes:

> When Enabled bit is cleared in GITS_CTLR,ITS feature continues
> to be enabled.This patch fixes the issue.
>
> Signed-off-by: Shashi Mallela 


Tested-by: Alex Bennée 

in so far as it doesn't break the kvm-unit-tests but it also doesn't
solve the:

  irq 55: nobody cared (try booting with the "irqpoll" option)
  CPU: 0 PID: 9 Comm: ksoftirqd/0 Not tainted 5.15.1-ajb #67
  Hardware name: linux,dummy-virt (DT)
  Call trace:
   dump_backtrace+0x0/0x1ac
   show_stack+0x18/0x24
   dump_stack_lvl+0x68/0x84
   dump_stack+0x18/0x34
   __report_bad_irq+0x4c/0x168
   note_interrupt+0x278/0x420
   handle_irq_event+0x84/0x1a0
   handle_fasteoi_irq+0x148/0x214
   handle_domain_irq+0x60/0x90
   gic_handle_irq+0xb0/0x120
   call_on_irq_stack+0x2c/0x5c
   do_interrupt_handler+0x40/0x58
   el1_interrupt+0x30/0x50
   el1h_64_irq_handler+0x18/0x24
   el1h_64_irq+0x78/0x7c
   finish_task_switch.isra.0+0x174/0x290
   __schedule+0x5e0/0x674
   __cond_resched+0x24/0x50
   run_ksoftirqd+0x44/0x5c
   smpboot_thread_fn+0x154/0x180
   kthread+0x118/0x130
   ret_from_fork+0x10/0x20
  handlers:
  [<50cdc74a>] vring_interrupt
  Disabling IRQ #55

that is being seen on newer kernels.

-- 
Alex Bennée

Re: SEV guest attestation

2021-11-25 Thread Sergio Lopez

On Thu, Nov 25, 2021 at 02:44:51PM +0200, Dov Murik wrote:
> [+cc jejb, tobin, jim, hubertus]
> 
> 
> On 25/11/2021 9:14, Sergio Lopez wrote:
> > On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
> >> * Daniel P. Berrangé (berra...@redhat.com) wrote:
> >>> On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
>  Hi,
> 
>  We recently discussed a way for remote SEV guest attestation through 
>  QEMU.
>  My initial approach was to get data needed for attestation through 
>  different
>  QMP commands (all of which are already available, so no changes required
>  there), deriving hashes and certificate data; and collecting all of this
>  into a new QMP struct (SevLaunchStart, which would include the VM's 
>  policy,
>  secret, and GPA) which would need to be upstreamed into QEMU. Once this 
>  is
>  provided, QEMU would then need to have support for attestation before a 
>  VM
>  is started. Upon speaking to Dave about this proposal, he mentioned that
>  this may not be the best approach, as some situations would render the
>  attestation unavailable, such as the instance where a VM is running in a
>  cloud, and a guest owner would like to perform attestation via QMP (a 
>  likely
>  scenario), yet a cloud provider cannot simply let anyone pass arbitrary 
>  QMP
>  commands, as this could be an issue.
> >>>
> >>> As a general point, QMP is a low level QEMU implementation detail,
> >>> which is generally expected to be consumed exclusively on the host
> >>> by a privileged mgmt layer, which will in turn expose its own higher
> >>> level APIs to users or other apps. I would not expect to see QMP
> >>> exposed to anything outside of the privileged host layer.
> >>>
> >>> We also use the QAPI protocol for QEMU guest agent commmunication,
> >>> however, that is a distinct service from QMP on the host. It shares
> >>> most infra with QMP but has a completely diffent command set. On the
> >>> host it is not consumed inside QEMU, but instead consumed by a
> >>> mgmt app like libvirt. 
> >>>
>  So I ask, does anyone involved in QEMU's SEV implementation have any 
>  input
>  on a quality way to perform guest attestation? If so, I'd be interested.
> >>>
> >>> I think what's missing is some clearer illustrations of how this
> >>> feature is expected to be consumed in some real world application
> >>> and the use cases we're trying to solve.
> >>>
> >>> I'd like to understand how it should fit in with common libvirt
> >>> applications across the different virtualization management
> >>> scenarios - eg virsh (command line),  virt-manger (local desktop
> >>> GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
> >>> And of course any non-traditional virt use cases that might be
> >>> relevant such as Kata.
> >>
> >> That's still not that clear; I know Alice and Sergio have some ideas
> >> (cc'd).
> >> There's also some standardisation efforts (e.g. 
> >> https://www.potaroo.net/ietf/html/ids-wg-rats.html 
> >> and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
> >> ) - that I can't claim to fully understand.
> >> However, there are some themes that are emerging:
> >>
> >>   a) One use is to only allow a VM to access some private data once we
> >> prove it's the VM we expect running in a secure/confidential system
> >>   b) (a) normally involves requesting some proof from the VM and then
> >> providing it some confidential data/a key if it's OK
> >>   c) RATs splits the problem up:
> >> 
> >> https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html#name-architectural-overview
> >> I don't fully understand the split yet, but in principal there are
> >> at least a few different things:
> >>
> >>   d) The comms layer
> >>   e) Something that validates the attestation message (i.e. the
> >> signatures are valid, the hashes all add up etc)
> >>   f) Something that knows what hashes to expect (i.e. oh that's a RHEL
> >> 8.4 kernel, or that's a valid kernel command line)
> >>   g) Something that holds some secrets that can be handed out if e & f
> >> are happy.
> >>
> >>   There have also been proposals (e.g. Intel HTTPA) for an attestable
> >> connection after a VM is running; that's probably quite different from
> >> (g) but still involves (e) & (f).
> >>
> >> In the simpler setups d,e,f,g probably live in one place; but it's not
> >> clear where they live - for example one scenario says that your cloud
> >> management layer holds some of them, another says you don't trust your
> >> cloud management layer and you keep them separate.
> >>
> >> So I think all we're actually interested in at the moment, is (d) and
> >> (e) and the way for (g) to get the secret back to the guest.
> >>
> >> Unfortunately the comms and the contents of them varies heavily with
> >> technology; in some you're talking to the qemu/hypervisor (SEV/SEV-ES)
> >> while in some

Re: SEV guest attestation

2021-11-25 Thread Dr. David Alan Gilbert

* Sergio Lopez (s...@redhat.com) wrote:
> On Thu, Nov 25, 2021 at 02:44:51PM +0200, Dov Murik wrote:
> > [+cc jejb, tobin, jim, hubertus]
> > 
> > 
> > On 25/11/2021 9:14, Sergio Lopez wrote:
> > > On Wed, Nov 24, 2021 at 06:29:07PM +, Dr. David Alan Gilbert wrote:
> > >> * Daniel P. Berrangé (berra...@redhat.com) wrote:
> > >>> On Wed, Nov 24, 2021 at 11:34:16AM -0500, Tyler Fanelli wrote:
> >  Hi,
> > 
> >  We recently discussed a way for remote SEV guest attestation through 
> >  QEMU.
> >  My initial approach was to get data needed for attestation through 
> >  different
> >  QMP commands (all of which are already available, so no changes 
> >  required
> >  there), deriving hashes and certificate data; and collecting all of 
> >  this
> >  into a new QMP struct (SevLaunchStart, which would include the VM's 
> >  policy,
> >  secret, and GPA) which would need to be upstreamed into QEMU. Once 
> >  this is
> >  provided, QEMU would then need to have support for attestation before 
> >  a VM
> >  is started. Upon speaking to Dave about this proposal, he mentioned 
> >  that
> >  this may not be the best approach, as some situations would render the
> >  attestation unavailable, such as the instance where a VM is running in 
> >  a
> >  cloud, and a guest owner would like to perform attestation via QMP (a 
> >  likely
> >  scenario), yet a cloud provider cannot simply let anyone pass 
> >  arbitrary QMP
> >  commands, as this could be an issue.
> > >>>
> > >>> As a general point, QMP is a low level QEMU implementation detail,
> > >>> which is generally expected to be consumed exclusively on the host
> > >>> by a privileged mgmt layer, which will in turn expose its own higher
> > >>> level APIs to users or other apps. I would not expect to see QMP
> > >>> exposed to anything outside of the privileged host layer.
> > >>>
> > >>> We also use the QAPI protocol for QEMU guest agent commmunication,
> > >>> however, that is a distinct service from QMP on the host. It shares
> > >>> most infra with QMP but has a completely diffent command set. On the
> > >>> host it is not consumed inside QEMU, but instead consumed by a
> > >>> mgmt app like libvirt. 
> > >>>
> >  So I ask, does anyone involved in QEMU's SEV implementation have any 
> >  input
> >  on a quality way to perform guest attestation? If so, I'd be 
> >  interested.
> > >>>
> > >>> I think what's missing is some clearer illustrations of how this
> > >>> feature is expected to be consumed in some real world application
> > >>> and the use cases we're trying to solve.
> > >>>
> > >>> I'd like to understand how it should fit in with common libvirt
> > >>> applications across the different virtualization management
> > >>> scenarios - eg virsh (command line),  virt-manger (local desktop
> > >>> GUI), cockpit (single host web mgmt), OpenStack (cloud mgmt), etc.
> > >>> And of course any non-traditional virt use cases that might be
> > >>> relevant such as Kata.
> > >>
> > >> That's still not that clear; I know Alice and Sergio have some ideas
> > >> (cc'd).
> > >> There's also some standardisation efforts (e.g. 
> > >> https://www.potaroo.net/ietf/html/ids-wg-rats.html 
> > >> and https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html
> > >> ) - that I can't claim to fully understand.
> > >> However, there are some themes that are emerging:
> > >>
> > >>   a) One use is to only allow a VM to access some private data once we
> > >> prove it's the VM we expect running in a secure/confidential system
> > >>   b) (a) normally involves requesting some proof from the VM and then
> > >> providing it some confidential data/a key if it's OK
> > >>   c) RATs splits the problem up:
> > >> 
> > >> https://www.ietf.org/archive/id/draft-ietf-rats-architecture-00.html#name-architectural-overview
> > >> I don't fully understand the split yet, but in principal there are
> > >> at least a few different things:
> > >>
> > >>   d) The comms layer
> > >>   e) Something that validates the attestation message (i.e. the
> > >> signatures are valid, the hashes all add up etc)
> > >>   f) Something that knows what hashes to expect (i.e. oh that's a RHEL
> > >> 8.4 kernel, or that's a valid kernel command line)
> > >>   g) Something that holds some secrets that can be handed out if e & f
> > >> are happy.
> > >>
> > >>   There have also been proposals (e.g. Intel HTTPA) for an attestable
> > >> connection after a VM is running; that's probably quite different from
> > >> (g) but still involves (e) & (f).
> > >>
> > >> In the simpler setups d,e,f,g probably live in one place; but it's not
> > >> clear where they live - for example one scenario says that your cloud
> > >> management layer holds some of them, another says you don't trust your
> > >> cloud management layer and you keep them separate.
> > >>
> > >> So I think all we're actually i

[PATCH v2 1/7] accel/tcg: introduce CF_NOIRQ

2021-11-25 Thread Alex Bennée

Here we introduce a new compiler flag to disable the checking of exit
request (icount_decr.u32). This is useful when we want to ensure the
next block cannot be preempted by an asynchronous event.

Suggested-by: Richard Henderson 
Signed-off-by: Alex Bennée 

---
v2
  - split from larger patch
  - reword the check in cpu_handle_interrupt and scope to CF_NOIRQ only
---
 include/exec/exec-all.h   |  1 +
 include/exec/gen-icount.h | 21 +
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 6bb2a0f7ec..35d8e93976 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -503,6 +503,7 @@ struct TranslationBlock {
 #define CF_USE_ICOUNT0x0002
 #define CF_INVALID   0x0004 /* TB is stale. Set with @jmp_lock held */
 #define CF_PARALLEL  0x0008 /* Generate code for a parallel context */
+#define CF_NOIRQ 0x0010 /* Generate an uninterruptible TB */
 #define CF_CLUSTER_MASK  0xff00 /* Top 8 bits are cluster ID */
 #define CF_CLUSTER_SHIFT 24
 
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 610cba58fe..c57204ddad 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -21,7 +21,6 @@ static inline void gen_tb_start(const TranslationBlock *tb)
 {
 TCGv_i32 count;
 
-tcg_ctx->exitreq_label = gen_new_label();
 if (tb_cflags(tb) & CF_USE_ICOUNT) {
 count = tcg_temp_local_new_i32();
 } else {
@@ -42,7 +41,19 @@ static inline void gen_tb_start(const TranslationBlock *tb)
 icount_start_insn = tcg_last_op();
 }
 
-tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
+/*
+ * Emit the check against icount_decr.u32 to see if we should exit
+ * unless we suppress the check with CF_NOIRQ. If we are using
+ * icount and have suppressed interruption the higher level code
+ * should have ensured we don't run more instructions than the
+ * budget.
+ */
+if (tb_cflags(tb) & CF_NOIRQ) {
+tcg_ctx->exitreq_label = NULL;
+} else {
+tcg_ctx->exitreq_label = gen_new_label();
+tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
+}
 
 if (tb_cflags(tb) & CF_USE_ICOUNT) {
 tcg_gen_st16_i32(count, cpu_env,
@@ -74,8 +85,10 @@ static inline void gen_tb_end(const TranslationBlock *tb, 
int num_insns)
tcgv_i32_arg(tcg_constant_i32(num_insns)));
 }
 
-gen_set_label(tcg_ctx->exitreq_label);
-tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED);
+if (tcg_ctx->exitreq_label) {
+gen_set_label(tcg_ctx->exitreq_label);
+tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED);
+}
 }
 
 #endif
-- 
2.30.2

[PATCH v2 6/7] MAINTAINERS: Remove me as a reviewer for the build and test/avocado

2021-11-25 Thread Alex Bennée

From: Willian Rampazzo 

Remove me as a reviewer for the Build and test automation and the
Integration Testing with the Avocado Framework and add Beraldo
Leal.

Signed-off-by: Willian Rampazzo 
Reviewed-by: Beraldo Leal 
Message-Id: <20211122191124.31620-1-willi...@redhat.com>
Signed-off-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20211123205729.2205806-7-alex.ben...@linaro.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index d3879aa3c1..8f5156bfa7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3469,7 +3469,7 @@ M: Alex Bennée 
 M: Philippe Mathieu-Daudé 
 M: Thomas Huth 
 R: Wainer dos Santos Moschetta 
-R: Willian Rampazzo 
+R: Beraldo Leal 
 S: Maintained
 F: .github/lockdown.yml
 F: .gitlab-ci.yml
@@ -3507,7 +3507,7 @@ W: https://trello.com/b/6Qi1pxVn/avocado-qemu
 R: Cleber Rosa 
 R: Philippe Mathieu-Daudé 
 R: Wainer dos Santos Moschetta 
-R: Willian Rampazzo 
+R: Beraldo Leal 
 S: Odd Fixes
 F: tests/avocado/
 
-- 
2.30.2

[PATCH for 6.2 v2 0/7] more tcg, plugin, test and build fixes

2021-11-25 Thread Alex Bennée

Hi,

As the release process rolls on here if the current state of my
for-6.2 tree. There are fixes for TCG, plugins, build and test.

For v2 I split the CF_NOIRQ patches and simplified a little bit. I
tested with a custom trace point while using watchpoints on the x86_64
memory TCG test. A proper test case would be nicer.

The following still need review:

 - accel/tcg: suppress IRQ check for special TBs
 - accel/tcg: introduce CF_NOIRQ

Alex Bennée (5):
  accel/tcg: introduce CF_NOIRQ
  accel/tcg: suppress IRQ check for special TBs
  tests/avocado: fix tcg_plugin mem access count test
  plugins/meson.build: fix linker issue with weird paths
  gdbstub: handle a potentially racing TaskState

Philippe Mathieu-Daudé (1):
  MAINTAINERS: Add section for Aarch64 GitLab custom runner

Willian Rampazzo (1):
  MAINTAINERS: Remove me as a reviewer for the build and test/avocado

 include/exec/exec-all.h  |  1 +
 include/exec/gen-icount.h| 21 +
 accel/tcg/cpu-exec.c |  9 +
 accel/tcg/translate-all.c|  2 +-
 gdbstub.c|  2 +-
 softmmu/physmem.c|  2 +-
 MAINTAINERS  | 10 --
 plugins/meson.build  |  4 ++--
 tests/avocado/tcg_plugins.py |  2 +-
 9 files changed, 41 insertions(+), 12 deletions(-)

-- 
2.30.2

[PATCH v2 3/7] tests/avocado: fix tcg_plugin mem access count test

2021-11-25 Thread Alex Bennée

When we cleaned up argument handling the test was missed.

Fixes: 5ae589faad ("tests/plugins/mem: introduce "track" arg and make args not 
positional")
Signed-off-by: Alex Bennée 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20211123205729.2205806-4-alex.ben...@linaro.org>
---
 tests/avocado/tcg_plugins.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py
index 9ca1515c3b..642d2e49e3 100644
--- a/tests/avocado/tcg_plugins.py
+++ b/tests/avocado/tcg_plugins.py
@@ -131,7 +131,7 @@ def test_aarch64_virt_mem_icount(self):
  suffix=".log")
 
 self.run_vm(kernel_path, kernel_command_line,
-"tests/plugin/libmem.so,arg=both", plugin_log.name,
+"tests/plugin/libmem.so,inline=true,callback=true", 
plugin_log.name,
 console_pattern,
 args=('-icount', 'shift=1'))
 
-- 
2.30.2

[PATCH v2 2/7] accel/tcg: suppress IRQ check for special TBs

2021-11-25 Thread Alex Bennée

When we set cpu->cflags_next_tb it is because we want to carefully
control the execution of the next TB. Currently there is a race that
causes the second stage of watchpoint handling to get ignored if an
IRQ is processed before we finish executing the instruction that
triggers the watchpoint. Use the new CF_NOIRQ facility to avoid the
race.

We also suppress IRQs when handling precise self modifying code to
avoid unnecessary bouncing.

Signed-off-by: Alex Bennée 
Cc: Pavel Dovgalyuk 
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/245

---
v2
  - split the CF_NOIRQ implementation
  - only apply CF_NOIRQ for watchpoints/SMC handling
  - minor reword of commit
---
 accel/tcg/cpu-exec.c  | 9 +
 accel/tcg/translate-all.c | 2 +-
 softmmu/physmem.c | 2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 2d14d02f6c..409ec8c38c 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -721,6 +721,15 @@ static inline bool need_replay_interrupt(int 
interrupt_request)
 static inline bool cpu_handle_interrupt(CPUState *cpu,
 TranslationBlock **last_tb)
 {
+/*
+ * If we have requested custom cflags with CF_NOIRQ we should
+ * skip checking here. Any pending interrupts will get picked up
+ * by the next TB we execute under normal cflags.
+ */
+if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
+return false;
+}
+
 /* Clear the interrupt flag now since we're processing
  * cpu->interrupt_request and cpu->exit_request.
  * Ensure zeroing happens before reading cpu->exit_request or
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index bd0bb81d08..1cd06572de 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1738,7 +1738,7 @@ tb_invalidate_phys_page_range__locked(struct 
page_collection *pages,
 if (current_tb_modified) {
 page_collection_unlock(pages);
 /* Force execution of one insn next time.  */
-cpu->cflags_next_tb = 1 | curr_cflags(cpu);
+cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
 mmap_unlock();
 cpu_loop_exit_noexc(cpu);
 }
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 314f8b439c..b43f92e900 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -946,7 +946,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr 
len,
 cpu_loop_exit(cpu);
 } else {
 /* Force execution of one insn next time.  */
-cpu->cflags_next_tb = 1 | CF_LAST_IO | curr_cflags(cpu);
+cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ | 
curr_cflags(cpu);
 mmap_unlock();
 cpu_loop_exit_noexc(cpu);
 }
-- 
2.30.2

[PATCH v2 4/7] plugins/meson.build: fix linker issue with weird paths

2021-11-25 Thread Alex Bennée

Signed-off-by: Alex Bennée 
Tested-by: Stefan Weil 
Fixes: https://gitlab.com/qemu-project/qemu/-/issues/712
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20211123205729.2205806-5-alex.ben...@linaro.org>
---
 plugins/meson.build | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/plugins/meson.build b/plugins/meson.build
index aeb386ebae..b3de57853b 100644
--- a/plugins/meson.build
+++ b/plugins/meson.build
@@ -2,9 +2,9 @@ plugin_ldflags = []
 # Modules need more symbols than just those in plugins/qemu-plugins.symbols
 if not enable_modules
   if 'CONFIG_HAS_LD_DYNAMIC_LIST' in config_host
-plugin_ldflags = ['-Wl,--dynamic-list=' + (meson.project_build_root() / 
'qemu-plugins-ld.symbols')]
+plugin_ldflags = ['-Wl,--dynamic-list=qemu-plugins-ld.symbols']
   elif 'CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST' in config_host
-plugin_ldflags = ['-Wl,-exported_symbols_list,' + 
(meson.project_build_root() / 'qemu-plugins-ld64.symbols')]
+plugin_ldflags = ['-Wl,-exported_symbols_list,qemu-plugins-ld64.symbols']
   endif
 endif
 
-- 
2.30.2

[PATCH v2 7/7] MAINTAINERS: Add section for Aarch64 GitLab custom runner

2021-11-25 Thread Alex Bennée

From: Philippe Mathieu-Daudé 

Add a MAINTAINERS section to cover the GitLab YAML config file
containing the jobs run on the custom runner sponsored by the
Works On Arm project [*].

[*] https://developer.arm.com/solutions/infrastructure/works-on-arm

Suggested-by: Thomas Huth 
Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Alex Bennée 
Message-Id: <2026163226.2719320-1-f4...@amsat.org>
Message-Id: <20211123205729.2205806-8-alex.ben...@linaro.org>
---
 MAINTAINERS | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8f5156bfa7..006a2293ba 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3511,6 +3511,12 @@ R: Beraldo Leal 
 S: Odd Fixes
 F: tests/avocado/
 
+GitLab custom runner (Works On Arm Sponsored)
+M: Alex Bennée 
+M: Philippe Mathieu-Daudé 
+S: Maintained
+F: .gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml
+
 Documentation
 -
 Build system architecture
-- 
2.30.2

Re: [PATCH] hw/intc: cannot clear GICv3 ITS CTLR[Enabled] bit

2021-11-25 Thread Peter Maydell

On Wed, 24 Nov 2021 at 18:22, Shashi Mallela  wrote:
>
> When Enabled bit is cleared in GITS_CTLR,ITS feature continues
> to be enabled.This patch fixes the issue.
>
> Signed-off-by: Shashi Mallela 
> ---
>  hw/intc/arm_gicv3_its.c | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
> index 84bcbb5f56..c929a9cb5c 100644
> --- a/hw/intc/arm_gicv3_its.c
> +++ b/hw/intc/arm_gicv3_its.c
> @@ -896,13 +896,14 @@ static bool its_writel(GICv3ITSState *s, hwaddr offset,
>
>  switch (offset) {
>  case GITS_CTLR:
> -s->ctlr |= (value & ~(s->ctlr));
> -
> -if (s->ctlr & ITS_CTLR_ENABLED) {
> +if (value & R_GITS_CTLR_ENABLED_MASK) {
> +s->ctlr |= ITS_CTLR_ENABLED;
>  extract_table_params(s);
>  extract_cmdq_params(s);
>  s->creadr = 0;
>  process_cmdq(s);
> +} else {
> +s->ctlr &= ~ITS_CTLR_ENABLED;
>  }
>  break;
>  case GITS_CBASER:

The code looks fine, so in that sense
Reviewed-by: Peter Maydell 

It seems odd that we have two different #defines for the
same bit, though (ITS_CTLR_ENABLED and R_GITS_CTLR_ENABLED_MASK).
We should probably standardize on the latter and drop the
former.

thanks
-- PMM

Re: [PATCH] hw/intc: cannot clear GICv3 ITS CTLR[Enabled] bit

2021-11-25 Thread Peter Maydell

On Thu, 25 Nov 2021 at 15:19, Alex Bennée  wrote:
>
>
> Shashi Mallela  writes:
>
> > When Enabled bit is cleared in GITS_CTLR,ITS feature continues
> > to be enabled.This patch fixes the issue.
> >
> > Signed-off-by: Shashi Mallela 
>
>
> Tested-by: Alex Bennée 
>
> in so far as it doesn't break the kvm-unit-tests but it also doesn't
> solve the:
>
>   irq 55: nobody cared (try booting with the "irqpoll" option)

For the fix to that try
https://patchew.org/QEMU/20211124202005.989935-1-peter.mayd...@linaro.org/

-- PMM

[PATCH v2 5/7] gdbstub: handle a potentially racing TaskState

2021-11-25 Thread Alex Bennée

When dealing with multi-threaded userspace programs there is a race
condition with the addition of cpu->opaque (aka TaskState). This is
due to cpu_copy calling cpu_create which updates the global vCPU list.
However the task state isn't set until later. This shouldn't be a
problem because the new thread can't have executed anything yet but
the gdbstub code does liberally iterate through the CPU list in
various places.

This sticking plaster ensure the not yet fully realized vCPU is given
an pid of -1 which should be enough to ensure it doesn't show up
anywhere else.

In the longer term I think the code that manages the association
between vCPUs and attached GDB processes could do with a clean-up and
re-factor.

Signed-off-by: Alex Bennée 
Tested-by: Richard Henderson 
Reviewed-by: Richard Henderson 
Cc: Richard Henderson 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/730
Message-Id: <20211123205729.2205806-6-alex.ben...@linaro.org>
---
 gdbstub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gdbstub.c b/gdbstub.c
index 23baaef40e..141d7bc4ec 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -94,7 +94,7 @@ static inline int cpu_gdb_index(CPUState *cpu)
 {
 #if defined(CONFIG_USER_ONLY)
 TaskState *ts = (TaskState *) cpu->opaque;
-return ts->ts_tid;
+return ts ? ts->ts_tid : -1;
 #else
 return cpu->cpu_index + 1;
 #endif
-- 
2.30.2

Re: SEV guest attestation

2021-11-25 Thread Daniel P . Berrangé

On Thu, Nov 25, 2021 at 03:40:36PM +, Dr. David Alan Gilbert wrote:
> * Sergio Lopez (s...@redhat.com) wrote:
> > On Thu, Nov 25, 2021 at 02:44:51PM +0200, Dov Murik wrote:
> > > 
> > > SEV-ES has pre-launch measurement and secret injection, just like SEV
> > > (except that the measurement includes the initial states of all vcpus,
> > > that is, their VMSAs.  BTW that means that in order to calculate the
> > > measurement the Attestation Server must know exactly how many vcpus are
> > > in the VM).
> > 
> > You need the number of vCPUs and an idea of what their initial state
> > is going to be, to be able to reproduce the same VMSA struct in the
> > Attestation Server.
> > 
> > This may tie the Attestation Server with a particular version of both
> > QEMU and KVM. I haven't checked if configuration changes in QEMU may
> > also have an impact on it.
> 
> That's all OK; I'm expecting the attestation server to be given a whole
> pile of information about the apparent environment to check.

Generally though we try not to let a VM to tied to a specific
version of software. eg use machine types to ensure that the
guest can run on any QEMU version, and get the same environment.
This lets host admin upgrade the host software for bug/security
fixes without negatively impacting users. It'd be nice not to
loose that feature with SEV if possible.

IOW, if there are aspects of the vCPU initial state that might
vary over time with different QEMU versions, should we be looking
to tie that variance into the machine type version.

For KVM changes, this might again come back to the idea fo a
"host type version".

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: SEV guest attestation

2021-11-25 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> On Thu, Nov 25, 2021 at 03:40:36PM +, Dr. David Alan Gilbert wrote:
> > * Sergio Lopez (s...@redhat.com) wrote:
> > > On Thu, Nov 25, 2021 at 02:44:51PM +0200, Dov Murik wrote:
> > > > 
> > > > SEV-ES has pre-launch measurement and secret injection, just like SEV
> > > > (except that the measurement includes the initial states of all vcpus,
> > > > that is, their VMSAs.  BTW that means that in order to calculate the
> > > > measurement the Attestation Server must know exactly how many vcpus are
> > > > in the VM).
> > > 
> > > You need the number of vCPUs and an idea of what their initial state
> > > is going to be, to be able to reproduce the same VMSA struct in the
> > > Attestation Server.
> > > 
> > > This may tie the Attestation Server with a particular version of both
> > > QEMU and KVM. I haven't checked if configuration changes in QEMU may
> > > also have an impact on it.
> > 
> > That's all OK; I'm expecting the attestation server to be given a whole
> > pile of information about the apparent environment to check.
> 
> Generally though we try not to let a VM to tied to a specific
> version of software. eg use machine types to ensure that the
> guest can run on any QEMU version, and get the same environment.
> This lets host admin upgrade the host software for bug/security
> fixes without negatively impacting users. It'd be nice not to
> loose that feature with SEV if possible.
> 
> IOW, if there are aspects of the vCPU initial state that might
> vary over time with different QEMU versions, should we be looking
> to tie that variance into the machine type version.

It's not tied to a particular version; but you may need to let the
attesting server know what version it's using so that it can check
everything adds up.

Dave

> For KVM changes, this might again come back to the idea fo a
> "host type version".
> 
> Regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

1 2 >

1 - 100 of 144 matches

Mail list logo