date:20140910

[Qemu-devel] [PACTH v4 2/6] target-arm: do not set do_interrupt handlers for ARM and AArch64 user modes

2014-09-10 Thread Ard Biesheuvel

From: Rob Herring 

User mode emulation should never get interrupts and thus should not
use the system emulation exception handler function. Remove the reference,
and '#ifndef USER_MODE_ONLY' the function itself as well, so that we can add
system mode only functionality to it.

Signed-off-by: Rob Herring 
Signed-off-by: Ard Biesheuvel 
---
 target-arm/cpu.c| 2 +-
 target-arm/cpu64.c  | 2 ++
 target-arm/helper-a64.c | 3 +++
 target-arm/helper.c | 5 -
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index b4c06c17cf87..55479ec8b226 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -1043,7 +1043,6 @@ static void arm_cpu_class_init(ObjectClass *oc, void 
*data)
 
 cc->class_by_name = arm_cpu_class_by_name;
 cc->has_work = arm_cpu_has_work;
-cc->do_interrupt = arm_cpu_do_interrupt;
 cc->dump_state = arm_cpu_dump_state;
 cc->set_pc = arm_cpu_set_pc;
 cc->gdb_read_register = arm_cpu_gdb_read_register;
@@ -1051,6 +1050,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void 
*data)
 #ifdef CONFIG_USER_ONLY
 cc->handle_mmu_fault = arm_cpu_handle_mmu_fault;
 #else
+cc->do_interrupt = arm_cpu_do_interrupt;
 cc->get_phys_page_debug = arm_cpu_get_phys_page_debug;
 cc->vmsd = &vmstate_arm_cpu;
 #endif
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index aa42803959be..9f88b9f4eea0 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -196,7 +196,9 @@ static void aarch64_cpu_class_init(ObjectClass *oc, void 
*data)
 {
 CPUClass *cc = CPU_CLASS(oc);
 
+#if !defined(CONFIG_USER_ONLY)
 cc->do_interrupt = aarch64_cpu_do_interrupt;
+#endif
 cc->set_pc = aarch64_cpu_set_pc;
 cc->gdb_read_register = aarch64_cpu_gdb_read_register;
 cc->gdb_write_register = aarch64_cpu_gdb_write_register;
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 2e9ef64786ae..89b913ee9396 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -438,6 +438,8 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, 
uint32_t bytes)
 return crc32c(acc, buf, bytes) ^ 0x;
 }
 
+#if !defined(CONFIG_USER_ONLY)
+
 /* Handle a CPU exception.  */
 void aarch64_cpu_do_interrupt(CPUState *cs)
 {
@@ -512,3 +514,4 @@ void aarch64_cpu_do_interrupt(CPUState *cs)
 env->pc = addr;
 cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
 }
+#endif
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 2b95f33872cb..9c129c8b080c 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -3237,11 +3237,6 @@ uint32_t HELPER(rbit)(uint32_t x)
 
 #if defined(CONFIG_USER_ONLY)
 
-void arm_cpu_do_interrupt(CPUState *cs)
-{
-cs->exception_index = -1;
-}
-
 int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int rw,
  int mmu_idx)
 {
-- 
1.8.3.2

[Qemu-devel] [PACTH v4 1/6] target-arm: add powered off cpu state

2014-09-10 Thread Ard Biesheuvel

From: Rob Herring 

Add tracking of cpu power state in order to support powering off of
cores in system emuluation. The initial state is determined by the
start-powered-off QOM property.

Signed-off-by: Rob Herring 
Reviewed-by: Peter Maydell 
Signed-off-by: Ard Biesheuvel 
---
 target-arm/cpu-qom.h | 2 ++
 target-arm/cpu.c | 7 ++-
 target-arm/machine.c | 5 +++--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 07f3c9e86639..eae0a7b9c908 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -98,6 +98,8 @@ typedef struct ARMCPU {
 
 /* Should CPU start in PSCI powered-off state? */
 bool start_powered_off;
+/* CPU currently in PSCI powered-off state */
+bool powered_off;
 
 /* [QEMU_]KVM_ARM_TARGET_* constant for this CPU, or
  * QEMU_KVM_ARM_TARGET_NONE if the kernel doesn't support this CPU type.
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 8199f32e3267..b4c06c17cf87 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -40,7 +40,9 @@ static void arm_cpu_set_pc(CPUState *cs, vaddr value)
 
 static bool arm_cpu_has_work(CPUState *cs)
 {
-return cs->interrupt_request &
+ARMCPU *cpu = ARM_CPU(cs);
+
+return !cpu->powered_off && cs->interrupt_request &
 (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB);
 }
 
@@ -91,6 +93,9 @@ static void arm_cpu_reset(CPUState *s)
 env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1;
 env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2;
 
+cpu->powered_off = cpu->start_powered_off;
+s->halted = cpu->start_powered_off;
+
 if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
 env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q';
 }
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 3bcc7cc833e0..63329ebd551f 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -218,8 +218,8 @@ static int cpu_post_load(void *opaque, int version_id)
 
 const VMStateDescription vmstate_arm_cpu = {
 .name = "cpu",
-.version_id = 20,
-.minimum_version_id = 20,
+.version_id = 21,
+.minimum_version_id = 21,
 .pre_save = cpu_pre_save,
 .post_load = cpu_post_load,
 .fields = (VMStateField[]) {
@@ -259,6 +259,7 @@ const VMStateDescription vmstate_arm_cpu = {
 VMSTATE_UINT64(env.exception.vaddress, ARMCPU),
 VMSTATE_TIMER(gt_timer[GTIMER_PHYS], ARMCPU),
 VMSTATE_TIMER(gt_timer[GTIMER_VIRT], ARMCPU),
+VMSTATE_BOOL(powered_off, ARMCPU),
 VMSTATE_END_OF_LIST()
 },
 .subsections = (VMStateSubsection[]) {
-- 
1.8.3.2

[Qemu-devel] [PACTH v4 3/6] target-arm: add hvc and smc exception emulation handling infrastructure

2014-09-10 Thread Ard Biesheuvel

From: Rob Herring 

Add the infrastructure to handle and emulate hvc and smc exceptions.
This will enable emulation of things such as PSCI calls. This commit
does not change the behavior and will exit with unknown exception.

Signed-off-by: Rob Herring 
Signed-off-by: Ard Biesheuvel 
---
 target-arm/cpu-qom.h   |  3 +++
 target-arm/cpu.h   |  2 ++
 target-arm/helper-a64.c| 16 +
 target-arm/helper.c| 23 ++
 target-arm/internals.h | 20 
 target-arm/translate-a64.c | 35 ---
 target-arm/translate.c | 59 +-
 target-arm/translate.h |  2 ++
 8 files changed, 140 insertions(+), 20 deletions(-)

diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index eae0a7b9c908..104cc67e82d2 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -192,6 +192,9 @@ extern const struct VMStateDescription vmstate_arm_cpu;
 void register_cp_regs_for_features(ARMCPU *cpu);
 void init_cpreg_list(ARMCPU *cpu);
 
+bool arm_cpu_do_hvc(CPUState *cs);
+bool arm_cpu_do_smc(CPUState *cs);
+
 void arm_cpu_do_interrupt(CPUState *cpu);
 void arm_v7m_cpu_do_interrupt(CPUState *cpu);
 
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 51bedc826299..d235929f4c12 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -51,6 +51,8 @@
 #define EXCP_EXCEPTION_EXIT  8   /* Return from v7M exception.  */
 #define EXCP_KERNEL_TRAP 9   /* Jumped to kernel code page.  */
 #define EXCP_STREX  10
+#define EXCP_HVC11
+#define EXCP_SMC12
 
 #define ARMV7M_EXCP_RESET   1
 #define ARMV7M_EXCP_NMI 2
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 89b913ee9396..1f8072ab141b 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -485,6 +485,22 @@ void aarch64_cpu_do_interrupt(CPUState *cs)
 case EXCP_FIQ:
 addr += 0x100;
 break;
+case EXCP_HVC:
+if (arm_cpu_do_hvc(cs)) {
+return;
+}
+/* Treat as unallocated encoding */
+qemu_log_mask(LOG_GUEST_ERROR, "HVC not implemented on this CPU\n");
+env->exception.syndrome = syn_uncategorized();
+break;
+case EXCP_SMC:
+if (arm_cpu_do_smc(cs)) {
+return;
+}
+/* Treat as unallocated encoding */
+qemu_log_mask(LOG_GUEST_ERROR, "SMC not implemented on this CPU\n");
+env->exception.syndrome = syn_uncategorized();
+break;
 default:
 cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
 }
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 9c129c8b080c..64bd49ecdaf9 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -3492,6 +3492,16 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
 env->thumb = addr & 1;
 }
 
+bool arm_cpu_do_hvc(CPUState *cs)
+{
+return false;
+}
+
+bool arm_cpu_do_smc(CPUState *cs)
+{
+return false;
+}
+
 /* Handle a CPU exception.  */
 void arm_cpu_do_interrupt(CPUState *cs)
 {
@@ -3508,6 +3518,19 @@ void arm_cpu_do_interrupt(CPUState *cs)
 
 /* TODO: Vectored interrupt controller.  */
 switch (cs->exception_index) {
+case EXCP_HVC:
+if (arm_cpu_do_hvc(cs)) {
+return;
+}
+qemu_log_mask(LOG_GUEST_ERROR, "HVC not implemented on this CPU\n");
+goto hvc_unallocated;
+case EXCP_SMC:
+if (arm_cpu_do_smc(cs)) {
+return;
+}
+qemu_log_mask(LOG_GUEST_ERROR, "SMC not implemented on this CPU\n");
+hvc_unallocated:
+/* Fall through -- treat as unallocated encoding */
 case EXCP_UDEF:
 new_mode = ARM_CPU_MODE_UND;
 addr = 0x04;
diff --git a/target-arm/internals.h b/target-arm/internals.h
index 53c2e3cf3e7e..caab98e6b508 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -210,6 +210,26 @@ static inline uint32_t syn_aa32_svc(uint32_t imm16, bool 
is_thumb)
 | (is_thumb ? 0 : ARM_EL_IL);
 }
 
+static inline uint32_t syn_aa64_hvc(uint32_t imm16)
+{
+return (EC_AA64_HVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0x);
+}
+
+static inline uint32_t syn_aa32_hvc(uint32_t imm16)
+{
+return (EC_AA32_HVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0x);
+}
+
+static inline uint32_t syn_aa64_smc(uint32_t imm16)
+{
+return (EC_AA64_SMC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0x);
+}
+
+static inline uint32_t syn_aa32_smc(void)
+{
+return (EC_AA32_SMC << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
 static inline uint32_t syn_aa64_bkpt(uint32_t imm16)
 {
 return (EC_AA64_BKPT << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0x);
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 8e66b6c97282..e5fb775c4a50 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1473,20 +1473,37 @@ static void disas_exc(DisasContext *s, uint32_t insn)
 
 switch (opc) {
 case 0:
-/* SVC, H

[Qemu-devel] [PACTH v4 5/6] target-arm: add emulation of PSCI calls for system emulation

2014-09-10 Thread Ard Biesheuvel

From: Rob Herring 

Add support for handling PSCI calls in system emulation. Both version
0.1 and 0.2 of the PSCI spec are supported. Platforms can enable support
by setting the "psci-conduit" QOM property on the cpus to SMC or HVC
emulation and having a PSCI binding in their dtb.

Signed-off-by: Rob Herring 
Signed-off-by: Ard Biesheuvel 
---
 target-arm/Makefile.objs   |   1 +
 target-arm/cpu-qom.h   |   6 ++
 target-arm/cpu.c   |  10 ++-
 target-arm/cpu.h   |   6 ++
 target-arm/helper.c|  12 +++
 target-arm/psci.c  | 183 +
 target-arm/translate-a64.c |   7 +-
 target-arm/translate.h |   2 +
 8 files changed, 222 insertions(+), 5 deletions(-)
 create mode 100644 target-arm/psci.c

diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
index dcd167e0d880..9460b409a5a1 100644
--- a/target-arm/Makefile.objs
+++ b/target-arm/Makefile.objs
@@ -7,5 +7,6 @@ obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-y += neon_helper.o iwmmxt_helper.o
 obj-y += gdbstub.o
+obj-$(CONFIG_SOFTMMU) += psci.o
 obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o helper-a64.o gdbstub64.o
 obj-y += crypto_helper.o
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 104cc67e82d2..bed7190bae57 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -101,6 +101,11 @@ typedef struct ARMCPU {
 /* CPU currently in PSCI powered-off state */
 bool powered_off;
 
+/* PSCI conduit used to invoke PSCI methods
+ * 0 - disabled, 1 - smc, 2 - hvc
+ */
+uint32_t psci_conduit;
+
 /* [QEMU_]KVM_ARM_TARGET_* constant for this CPU, or
  * QEMU_KVM_ARM_TARGET_NONE if the kernel doesn't support this CPU type.
  */
@@ -192,6 +197,7 @@ extern const struct VMStateDescription vmstate_arm_cpu;
 void register_cp_regs_for_features(ARMCPU *cpu);
 void init_cpreg_list(ARMCPU *cpu);
 
+bool arm_handle_psci(CPUState *cs);
 bool arm_cpu_do_hvc(CPUState *cs);
 bool arm_cpu_do_smc(CPUState *cs);
 
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 55479ec8b226..eba0271a852e 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -268,9 +268,12 @@ static void arm_cpu_initfn(Object *obj)
 cpu->psci_version = 1; /* By default assume PSCI v0.1 */
 cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
 
-if (tcg_enabled() && !inited) {
-inited = true;
-arm_translate_init();
+if (tcg_enabled()) {
+cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
+if (!inited) {
+inited = true;
+arm_translate_init();
+}
 }
 }
 
@@ -1024,6 +1027,7 @@ static const ARMCPUInfo arm_cpus[] = {
 
 static Property arm_cpu_properties[] = {
 DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
+DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0),
 DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
 DEFINE_PROP_END_OF_LIST()
 };
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index d235929f4c12..a69d69254af4 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -1350,4 +1350,10 @@ static inline void cpu_pc_from_tb(CPUARMState *env, 
TranslationBlock *tb)
 }
 }
 
+enum {
+QEMU_PSCI_CONDUIT_DISABLED = 0,
+QEMU_PSCI_CONDUIT_SMC = 1,
+QEMU_PSCI_CONDUIT_HVC = 2,
+};
+
 #endif
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 64bd49ecdaf9..2df34adc0ff7 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -3494,11 +3494,23 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
 
 bool arm_cpu_do_hvc(CPUState *cs)
 {
+ARMCPU *cpu = ARM_CPU(cs);
+
+if (cpu->psci_conduit == QEMU_PSCI_CONDUIT_HVC) {
+return arm_handle_psci(cs);
+}
+
 return false;
 }
 
 bool arm_cpu_do_smc(CPUState *cs)
 {
+ARMCPU *cpu = ARM_CPU(cs);
+
+if (cpu->psci_conduit == QEMU_PSCI_CONDUIT_SMC) {
+return arm_handle_psci(cs);
+}
+
 return false;
 }
 
diff --git a/target-arm/psci.c b/target-arm/psci.c
new file mode 100644
index ..7347cbdc17ef
--- /dev/null
+++ b/target-arm/psci.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2014 - Linaro
+ * Author: Rob Herring 
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+bool arm_handle_psci(CPUState *cs)
+{
+/*
+ * This functi

[Qemu-devel] [PACTH v4 0/6] ARM: add PSCI 0.2 support in TCG mode

2014-09-10 Thread Ard Biesheuvel

This series adds PSCI support to ARM and AArch64 system emulation when running
in TCG mode. As PSCI calls can be made using either hypervisor call (HVC) or
secure monitor call (SMC) instructions, support is added for handling those
in patch #3 before patch #5 adds the actual PSCI dispatch logic. Patch #6
enables PSCI for the mach-virt platform.

Changes since v3:
- added some R-b's
- remove user mode interrupt handler for AArch32 as well
- update the SMC and HVC handling logic to
  . take feature bits for EL2 and EL3 into account
  . deal with conditional execution state in A32/T32
  . add ARCH() and IS_USER()/current_pl tests where appropriate
- added some missing () in the PSCI constants
- update the PSCI dispatch logic to
  . CPU_ON: take bit 0 of the entry point into account to either set the Thumb
state or return an error, and assert that the onlined CPU is in the same
mode as the calling CPU
  . CPU_OFF: call cpu_loop_exit() directly
  . CPU_SUSPEND; use helper_wfi()
  . follow the PSCI spec and adopt the name 'conduit' to refer to the type of
instruction used to invoke PSCI functions
  . check the conduit in the translation stage to avoid advancing the single
step state machine inadvertently
- remove smp restriction from mach-virt running in TCG mode

Changes since v2:
- added path #4 to introduce QEMU counterparts of the kernel PSCI constants we
  refer to in the PSCI emulation, this is needed so QEMU can be built in
  environments that don't supply the PSCI header file.

Changes since v1:
- processed first round of review, that was already given when this series was
  sent out by Rob himself back in May


*** BLURB HERE ***

Ard Biesheuvel (1):
  target-arm: add missing PSCI constants needed for PSCI emulation

Rob Herring (5):
  target-arm: add powered off cpu state
  target-arm: do not set do_interrupt handlers for ARM and AArch64 user
modes
  target-arm: add hvc and smc exception emulation handling
infrastructure
  target-arm: add emulation of PSCI calls for system emulation
  arm/virt: enable PSCI emulation support for system emulation

 hw/arm/virt.c  |  82 ++--
 target-arm/Makefile.objs   |   1 +
 target-arm/cpu-qom.h   |  11 +++
 target-arm/cpu.c   |  19 +++--
 target-arm/cpu.h   |   8 ++
 target-arm/cpu64.c |   2 +
 target-arm/helper-a64.c|  19 +
 target-arm/helper.c|  40 --
 target-arm/internals.h |  20 +
 target-arm/kvm-consts.h|  40 ++
 target-arm/machine.c   |   5 +-
 target-arm/psci.c  | 183 +
 target-arm/translate-a64.c |  38 +++---
 target-arm/translate.c |  59 ---
 target-arm/translate.h |   4 +
 15 files changed, 455 insertions(+), 76 deletions(-)
 create mode 100644 target-arm/psci.c

-- 
1.8.3.2

[Qemu-devel] [PACTH v4 4/6] target-arm: add missing PSCI constants needed for PSCI emulation

2014-09-10 Thread Ard Biesheuvel

This adds some PSCI function IDs and symbolic return codes that are needed
to implement PSCI emulation in TCG mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Ard Biesheuvel 
---
 target-arm/kvm-consts.h | 40 
 1 file changed, 40 insertions(+)

diff --git a/target-arm/kvm-consts.h b/target-arm/kvm-consts.h
index 091c1267d659..aea12f1bc4c5 100644
--- a/target-arm/kvm-consts.h
+++ b/target-arm/kvm-consts.h
@@ -59,14 +59,21 @@ MISMATCH_CHECK(QEMU_PSCI_0_1_FN_MIGRATE, 
KVM_PSCI_FN_MIGRATE)
 (QEMU_PSCI_0_2_FN_BASE + QEMU_PSCI_0_2_64BIT)
 #define QEMU_PSCI_0_2_FN64(n) (QEMU_PSCI_0_2_FN64_BASE + (n))
 
+#define QEMU_PSCI_0_2_FN_PSCI_VERSION QEMU_PSCI_0_2_FN(0)
 #define QEMU_PSCI_0_2_FN_CPU_SUSPEND QEMU_PSCI_0_2_FN(1)
 #define QEMU_PSCI_0_2_FN_CPU_OFF QEMU_PSCI_0_2_FN(2)
 #define QEMU_PSCI_0_2_FN_CPU_ON QEMU_PSCI_0_2_FN(3)
+#define QEMU_PSCI_0_2_FN_AFFINITY_INFO QEMU_PSCI_0_2_FN(4)
 #define QEMU_PSCI_0_2_FN_MIGRATE QEMU_PSCI_0_2_FN(5)
+#define QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE QEMU_PSCI_0_2_FN(6)
+#define QEMU_PSCI_0_2_FN_MIGRATE_INFO_UP_CPU QEMU_PSCI_0_2_FN(7)
+#define QEMU_PSCI_0_2_FN_SYSTEM_OFF QEMU_PSCI_0_2_FN(8)
+#define QEMU_PSCI_0_2_FN_SYSTEM_RESET QEMU_PSCI_0_2_FN(9)
 
 #define QEMU_PSCI_0_2_FN64_CPU_SUSPEND QEMU_PSCI_0_2_FN64(1)
 #define QEMU_PSCI_0_2_FN64_CPU_OFF QEMU_PSCI_0_2_FN64(2)
 #define QEMU_PSCI_0_2_FN64_CPU_ON QEMU_PSCI_0_2_FN64(3)
+#define QEMU_PSCI_0_2_FN64_AFFINITY_INFO QEMU_PSCI_0_2_FN64(4)
 #define QEMU_PSCI_0_2_FN64_MIGRATE QEMU_PSCI_0_2_FN64(5)
 
 MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_SUSPEND, PSCI_0_2_FN_CPU_SUSPEND)
@@ -77,6 +84,39 @@ MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_SUSPEND, 
PSCI_0_2_FN64_CPU_SUSPEND)
 MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_ON, PSCI_0_2_FN64_CPU_ON)
 MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_MIGRATE, PSCI_0_2_FN64_MIGRATE)
 
+/* PSCI v0.2 return values used by TCG emulation of PSCI */
+
+/* No Trusted OS migration to worry about when offlining CPUs */
+#define QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED2
+
+/* We implement version 0.2 only */
+#define QEMU_PSCI_0_2_RET_VERSION_0_2   2
+
+MISMATCH_CHECK(QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED, PSCI_0_2_TOS_MP)
+MISMATCH_CHECK(QEMU_PSCI_0_2_RET_VERSION_0_2,
+   (PSCI_VERSION_MAJOR(0) | PSCI_VERSION_MINOR(2)))
+
+/* PSCI return values (inclusive of all PSCI versions) */
+#define QEMU_PSCI_RET_SUCCESS 0
+#define QEMU_PSCI_RET_NOT_SUPPORTED   -1
+#define QEMU_PSCI_RET_INVALID_PARAMS  -2
+#define QEMU_PSCI_RET_DENIED  -3
+#define QEMU_PSCI_RET_ALREADY_ON  -4
+#define QEMU_PSCI_RET_ON_PENDING  -5
+#define QEMU_PSCI_RET_INTERNAL_FAILURE-6
+#define QEMU_PSCI_RET_NOT_PRESENT -7
+#define QEMU_PSCI_RET_DISABLED-8
+
+MISMATCH_CHECK(QEMU_PSCI_RET_SUCCESS, PSCI_RET_SUCCESS)
+MISMATCH_CHECK(QEMU_PSCI_RET_NOT_SUPPORTED, PSCI_RET_NOT_SUPPORTED)
+MISMATCH_CHECK(QEMU_PSCI_RET_INVALID_PARAMS, PSCI_RET_INVALID_PARAMS)
+MISMATCH_CHECK(QEMU_PSCI_RET_DENIED, PSCI_RET_DENIED)
+MISMATCH_CHECK(QEMU_PSCI_RET_ALREADY_ON, PSCI_RET_ALREADY_ON)
+MISMATCH_CHECK(QEMU_PSCI_RET_ON_PENDING, PSCI_RET_ON_PENDING)
+MISMATCH_CHECK(QEMU_PSCI_RET_INTERNAL_FAILURE, PSCI_RET_INTERNAL_FAILURE)
+MISMATCH_CHECK(QEMU_PSCI_RET_NOT_PRESENT, PSCI_RET_NOT_PRESENT)
+MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED)
+
 /* Note that KVM uses overlapping values for AArch32 and AArch64
  * target CPU numbers. AArch32 targets:
  */
-- 
1.8.3.2

[Qemu-devel] [PACTH v4 6/6] arm/virt: enable PSCI emulation support for system emulation

2014-09-10 Thread Ard Biesheuvel

From: Rob Herring 

Now that we have PSCI emulation, enable it for the virt platform.
This simplifies the virt machine a bit now that PSCI no longer
needs to be a KVM only feature.

Signed-off-by: Rob Herring 
Signed-off-by: Ard Biesheuvel 
---
 hw/arm/virt.c | 82 +++
 1 file changed, 38 insertions(+), 44 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index d6fffc75bda0..6537b58f2af8 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -189,47 +189,48 @@ static void create_fdt(VirtBoardInfo *vbi)
 
 static void fdt_add_psci_node(const VirtBoardInfo *vbi)
 {
+uint32_t cpu_suspend_fn;
+uint32_t cpu_off_fn;
+uint32_t cpu_on_fn;
+uint32_t migrate_fn;
 void *fdt = vbi->fdt;
 ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0));
 
-/* No PSCI for TCG yet */
-if (kvm_enabled()) {
-uint32_t cpu_suspend_fn;
-uint32_t cpu_off_fn;
-uint32_t cpu_on_fn;
-uint32_t migrate_fn;
-
-qemu_fdt_add_subnode(fdt, "/psci");
-if (armcpu->psci_version == 2) {
-const char comp[] = "arm,psci-0.2\0arm,psci";
-qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
-
-cpu_off_fn = QEMU_PSCI_0_2_FN_CPU_OFF;
-if (arm_feature(&armcpu->env, ARM_FEATURE_AARCH64)) {
-cpu_suspend_fn = QEMU_PSCI_0_2_FN64_CPU_SUSPEND;
-cpu_on_fn = QEMU_PSCI_0_2_FN64_CPU_ON;
-migrate_fn = QEMU_PSCI_0_2_FN64_MIGRATE;
-} else {
-cpu_suspend_fn = QEMU_PSCI_0_2_FN_CPU_SUSPEND;
-cpu_on_fn = QEMU_PSCI_0_2_FN_CPU_ON;
-migrate_fn = QEMU_PSCI_0_2_FN_MIGRATE;
-}
-} else {
-qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
+qemu_fdt_add_subnode(fdt, "/psci");
+if (armcpu->psci_version == 2) {
+const char comp[] = "arm,psci-0.2\0arm,psci";
+qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
 
-cpu_suspend_fn = QEMU_PSCI_0_1_FN_CPU_SUSPEND;
-cpu_off_fn = QEMU_PSCI_0_1_FN_CPU_OFF;
-cpu_on_fn = QEMU_PSCI_0_1_FN_CPU_ON;
-migrate_fn = QEMU_PSCI_0_1_FN_MIGRATE;
+cpu_off_fn = QEMU_PSCI_0_2_FN_CPU_OFF;
+if (arm_feature(&armcpu->env, ARM_FEATURE_AARCH64)) {
+cpu_suspend_fn = QEMU_PSCI_0_2_FN64_CPU_SUSPEND;
+cpu_on_fn = QEMU_PSCI_0_2_FN64_CPU_ON;
+migrate_fn = QEMU_PSCI_0_2_FN64_MIGRATE;
+} else {
+cpu_suspend_fn = QEMU_PSCI_0_2_FN_CPU_SUSPEND;
+cpu_on_fn = QEMU_PSCI_0_2_FN_CPU_ON;
+migrate_fn = QEMU_PSCI_0_2_FN_MIGRATE;
 }
+} else {
+qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
 
-qemu_fdt_setprop_string(fdt, "/psci", "method", "hvc");
-
-qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend", cpu_suspend_fn);
-qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
-qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
-qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
+cpu_suspend_fn = QEMU_PSCI_0_1_FN_CPU_SUSPEND;
+cpu_off_fn = QEMU_PSCI_0_1_FN_CPU_OFF;
+cpu_on_fn = QEMU_PSCI_0_1_FN_CPU_ON;
+migrate_fn = QEMU_PSCI_0_1_FN_MIGRATE;
 }
+
+/* We adopt the PSCI spec's nomenclature, and use 'conduit' to refer
+ * to the instruction that should be used to invoke PSCI functions.
+ * However, the device tree binding uses 'method' instead, so that is
+ * what we should use here.
+ */
+qemu_fdt_setprop_string(fdt, "/psci", "method", "hvc");
+
+qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend", cpu_suspend_fn);
+qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
+qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
+qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
 }
 
 static void fdt_add_timer_nodes(const VirtBoardInfo *vbi)
@@ -467,16 +468,6 @@ static void machvirt_init(MachineState *machine)
 
 vbi->smp_cpus = smp_cpus;
 
-/*
- * Only supported method of starting secondary CPUs is PSCI and
- * PSCI is not yet supported with TCG, so limit smp_cpus to 1
- * if we're not using KVM.
- */
-if (!kvm_enabled() && smp_cpus > 1) {
-error_report("mach-virt: must enable KVM to use multiple CPUs");
-exit(1);
-}
-
 if (machine->ram_size > vbi->memmap[VIRT_MEM].size) {
 error_report("mach-virt: cannot model more than 30GB RAM");
 exit(1);
@@ -495,6 +486,9 @@ static void machvirt_init(MachineState *machine)
 }
 cpuobj = object_new(object_class_get_name(oc));
 
+object_property_set_int(cpuobj, QEMU_PSCI_CONDUIT_HVC, "psci-conduit",
+NULL);
+
 /* Secondary CPUs start in PSCI powered-down state */
 if (n > 0) {
 object_property_set_bool(cpuobj, tru

Re: [Qemu-devel] [PATCH 0/8] add basic recovery logic to quorum driver

2014-09-10 Thread Liu Yuan

On Sun, Sep 07, 2014 at 05:12:31PM +0200, Benoît Canet wrote:
> The Monday 01 Sep 2014 à 15:43:06 (+0800), Liu Yuan wrote :
> > This patch set mainly add mainly two logics to implement device recover
> > - notify qourum driver of the broken states from the child driver(s)
> > - dirty track and sync the device after it is repaired
> > 
> > Thus quorum allow VMs to continue while some child devices are broken and 
> > when
> > the child devices are repaired and return back, we sync dirty bits during
> > downtime to keep data consistency.
> > 
> > The recovery logic is based on the driver state bitmap and will sync the 
> > dirty
> > bits with a timeslice window in a coroutine in this prtimive implementation.
> > 
> > Simple graph about 2 children with threshold=1 and read-pattern=fifo:
> > (similary to DRBD)
> > 
> > + denote device sync iteration
> > - IO on a single device
> > = IO on two devices
> > 
> >   sync complete, release dirty bitmap
> >  ^
> >  |
> >   -++==
> >  | |
> >  | v
> >  |   device repaired and begin to sync
> >  v
> >device broken, create a dirty bitmap
> > 
> >   This sync logic can take care of nested broken problem, that devices are
> >   broken while in sync. We just start a sync process after the devices are
> >   repaired again and switch the devices from broken to sound only when the 
> > sync
> >   completes.
> > 
> > For read-pattern=quorum mode, it enjoys the recovery logic without any 
> > problem.
> > 
> > Todo:
> > - use aio interface to sync data (multiple transfer in one go)
> > - dynamic slice window to control sync bandwidth more smoothly
> > - add auto-reconnection mechanism to other protocol (if not support yet)
> > - add tests
> > 
> > Cc: Eric Blake 
> > Cc: Benoit Canet 
> > Cc: Kevin Wolf 
> > Cc: Stefan Hajnoczi 
> > 
> > Liu Yuan (8):
> >   block/quorum: initialize qcrs.aiocb for read
> >   block: add driver operation callbacks
> >   block/sheepdog: propagate disconnect/reconnect events to upper driver
> >   block/quorum: add quorum_aio_release() helper
> >   quorum: fix quorum_aio_cancel()
> >   block/quorum: add broken state to BlockDriverState
> >   block: add two helpers
> >   quorum: add basic device recovery logic
> > 
> >  block.c   |  17 +++
> >  block/quorum.c| 324 
> > +-
> >  block/sheepdog.c  |   9 ++
> >  include/block/block.h |   9 ++
> >  include/block/block_int.h |   6 +
> >  trace-events  |   5 +
> >  6 files changed, 336 insertions(+), 34 deletions(-)
> > 
> > -- 
> > 1.9.1
> > 
> 
> Hi liu,
> 
> Had you noticed that your series conflict with one of Fam's series in the 
> quorum cancel
> function fix patch ?

Not yet, thanks for reminding.

> Could you find an arrangement with Fam so the two patches don't collide 
> anymore ?
> 
> Do you intend to respin your series ?

Yes, I'll rebase the v2 later before more possible reviews.

Thanks
Yuan

Re: [Qemu-devel] [RFC PATCH 0/8] Add Generic PCI host device update

2014-09-10 Thread alvise rigo

Hello Claudio,

Unfortunately I'm still not able to reproduce the problem.

I suspect though that the issue concerns the way the tests were conducted, so
I take this opportunity to ask Rob how many disks you used to test the initial
patch series.
I used something like:

-device lsi53c895a -drive if=scsi,index=1,file=scsi.img \
-drive if=scsi,index=2,file=scsi_2.img ...

to attach the SCSI disks to the guest.

Thank you,
alvise

On Tue, Sep 9, 2014 at 6:35 PM, Claudio Fontana
 wrote:
> On 11.07.2014 11:28, Alvise Rigo wrote:
>> The kernel version is a very recent one: v3.16.0-rc1.
>> Maybe you are right. I will test some older version to see if I'm able
>> to reproduce the issue.
>>
>> Thank you,
>> alvise
>
> Any news on this?
>
> I will be soon in the situation when I can start testing these as the way to 
> get PCI working in OSv for AArch64,
> but will require a bit more time, since there is some more mechanical work 
> involved.
>
> Ciao,
>
> Claudio
>
>> Il 11/07/2014 11:09, Peter Maydell ha scritto:
>>> On 11 July 2014 08:21, Alvise Rigo  wrote:
 This work has been tested attaching several PCI devices to the mach-virt
 platform.  The tested devices are: virtio-blk-pci, virtio-net-pci,
 lsi53c895a and pci-ohci (all attached at the same time).
 Even if the original work was not changed in its core functionalities, I
 couldn't reproduce the malfunctioning of the LSI SCSI mentioned in [1].
 After attaching several qcow2 images, formatting and filling them, I
 didn't notice anything wrong. Am I missing something?
>>>
>>> Interesting. Perhaps the bug was on the kernel side; which
>>> guest kernel version are you using to test with?
>>>
>>> thanks
>>> -- PMM
>>>
>>
>
>
>

Re: [Qemu-devel] [RFC v1 5/6] stm32f205: Add the SoC

2014-09-10 Thread Alistair Francis

On Tue, Sep 9, 2014 at 11:50 PM, Peter Crosthwaite
 wrote:
> On Tue, Sep 9, 2014 at 6:24 PM, Alistair Francis  wrote:
>> This patch adds the stm32f205 SoC. This will be used by the
>> Netduino 2 to create a machine
>>
>> Signed-off-by: Alistair Francis 
>> ---
>>
>>  hw/arm/Makefile.objs   |   2 +-
>>  hw/arm/stm32f205_soc.c | 140 
>> +
>>  include/hw/arm/stm32f205_soc.h |  61 ++
>>  3 files changed, 202 insertions(+), 1 deletion(-)
>>  create mode 100644 hw/arm/stm32f205_soc.c
>>  create mode 100644 include/hw/arm/stm32f205_soc.h
>>
>> diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
>> index 6088e53..673feef 100644
>> --- a/hw/arm/Makefile.objs
>> +++ b/hw/arm/Makefile.objs
>> @@ -2,7 +2,7 @@ obj-y += boot.o collie.o exynos4_boards.o gumstix.o 
>> highbank.o
>>  obj-$(CONFIG_DIGIC) += digic_boards.o
>>  obj-y += integratorcp.o kzm.o mainstone.o musicpal.o nseries.o
>>  obj-y += omap_sx1.o palm.o realview.o spitz.o stellaris.o
>> -obj-y += tosa.o versatilepb.o vexpress.o virt.o xilinx_zynq.o z2.o
>> +obj-y += tosa.o versatilepb.o vexpress.o virt.o xilinx_zynq.o z2.o 
>> stm32f205_soc.o
>
> New obj-y line.
>
>>
>>  obj-y += armv7m.o exynos4210.o pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o
>>  obj-$(CONFIG_DIGIC) += digic.o
>> diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
>> new file mode 100644
>> index 000..da36f61
>> --- /dev/null
>> +++ b/hw/arm/stm32f205_soc.c
>> @@ -0,0 +1,140 @@
>> +/*
>> + * STM32F205xx SoC
>> + *
>> + * Copyright (c) 2014 Alistair Francis 
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a 
>> copy
>> + * of this software and associated documentation files (the "Software"), to 
>> deal
>> + * in the Software without restriction, including without limitation the 
>> rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included 
>> in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
>> FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "hw/arm/stm32f205_soc.h"
>> +
>> +#define FLASH_BASE_ADDRESS 0x0800
>> +#define FLASH_SIZE 1024
>> +#define SRAM_BASE_ADDRESS 0x2000
>> +#define SRAM_SIZE 192
>> +
>> +static void stm32f205_soc_initfn(Object *obj)
>> +{
>> +STM32F205State *s = STM32F205_SOC(obj);
>> +int i;
>> +
>> +object_initialize(&s->syscfg, sizeof(s->syscfg), TYPE_STM32F205_SYSCFG);
>> +qdev_set_parent_bus(DEVICE(&s->syscfg), sysbus_get_default());
>> +
>> +for (i = 0; i < 5; i++) {
>> +object_initialize(&s->usart[i], sizeof(s->usart[i]),
>> +  TYPE_STM32F205_USART);
>> +qdev_set_parent_bus(DEVICE(&s->usart[i]), sysbus_get_default());
>> +}
>> +
>> +for (i = 0; i < 4; i++) {
>> +object_initialize(&s->timer[i], sizeof(s->timer[i]),
>> +  TYPE_STM32F205_TIMER);
>> +qdev_set_parent_bus(DEVICE(&s->timer[i]), sysbus_get_default());
>> +}
>> +}
>> +
>> +static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
>> +{
>> +static const uint32_t timer_addr[] = { 0x4000, 0x4400,
>> +0x4800, 0x4C00 };
>
> You should add a comment about how you only model TIM2-5. This list is
> to grow significantly in a fuller implementation.
>
>> +static const uint32_t usart_addr[] = { 0x40011000, 0x40004400,
>> +0x40004800, 0x40004C00, 0x40005000, 0x40011400 };
>> +
>
> Just put the static consts up top of the file so the are easily
> spotted as self documentation.
>
>> +static const int timer_irq[] = {28, 29, 30, 50};
>> +static const int usart_irq[] = {37, 38, 39, 52, 53, 71, 82, 83};
>> +
>> +STM32F205State *s = STM32F205_SOC(dev_soc);
>> +DeviceState *syscfgdev, *usartdev, *timerdev;
>> +SysBusDevice *syscfgbusdev, *usartbusdev, *timerbusdev;
>> +qemu_irq *pic;;
>> +Error *err = NULL;
>> +int i;
>> +
>> +pic = armv7m_init(get_system_memory(),
>> +  FLASH_SIZE, FLASH_BASE_ADDRESS, SRAM_SIZE, 96,
>> +  s->kernel_filename, s->cpu_model);
>> +
>> +/* System configuration controller */
>> +syscfgdev = DEVICE(&s->syscfg);
>> +syscfgdev->id = "stm32f205xx-syscfg";
>
> Why you need t

Re: [Qemu-devel] [PATCH 0/8] add basic recovery logic to quorum driver

2014-09-10 Thread Liu Yuan

On Wed, Sep 03, 2014 at 12:19:14AM +0200, Benoît Canet wrote:
> The Monday 01 Sep 2014 à 15:43:06 (+0800), Liu Yuan wrote :
> 
> Liu,
> 
> Do you think this could work with qcow2 file backed by NFS servers ?

It depends on which client we use.

If we use Linux NFS client by 'posix file' protocol, I guess we need to hack
raw-posix.c and insert reconnect/disconnect callbacks.

I'm exploring the possibility of QEMU's nfs client block/nfs.c too.

Either way, this should work with all the protocols with proper hacks.

Thanks
Yuan

[Qemu-devel] [RFC PATCH v2] Add HMP command "info memory-devices"

2014-09-10 Thread Zhu Guihua

Provides HMP equivalent of QMP query-memory-devices command.

Signed-off-by: Zhu Guihua 
---
 hmp-commands.hx |  2 ++
 hmp.c   | 43 +++
 hmp.h   |  1 +
 monitor.c   |  7 +++
 4 files changed, 53 insertions(+)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index f859f8d..0b1a4f7 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1778,6 +1778,8 @@ show qdev device model list
 show roms
 @item info tpm
 show the TPM device
+@item info memory-devices
+show the memory devices
 @end table
 ETEXI
 
diff --git a/hmp.c b/hmp.c
index 40a90da..93c1dfe 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1718,3 +1718,46 @@ void hmp_info_memdev(Monitor *mon, const QDict *qdict)
 
 qapi_free_MemdevList(memdev_list);
 }
+
+void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
+{
+Error *err = NULL;
+MemoryDeviceInfoList *list = qmp_query_memory_devices(&err);
+MemoryDeviceInfoList *elem = list;
+MemoryDeviceInfo *info;
+PCDIMMDeviceInfo *di;
+int i = 0;
+
+while (elem) {
+info = elem->value;
+
+if (info) {
+switch (info->kind) {
+case MEMORY_DEVICE_INFO_KIND_DIMM:
+di = info->dimm;
+
+monitor_printf(mon, "MemoryDevice %d\n", i);
+monitor_printf(mon, "  %s\n",
+   
MemoryDeviceInfoKind_lookup[MEMORY_DEVICE_INFO_KIND_DIMM]);
+monitor_printf(mon, "  id: %s\n", di->id);
+monitor_printf(mon, "  addr: %" PRId64 "\n", di->addr);
+monitor_printf(mon, "  slot: %" PRId64 "\n", di->slot);
+monitor_printf(mon, "  node: %" PRId64 "\n", di->node);
+monitor_printf(mon, "  size: %" PRId64 "\n", di->size);
+monitor_printf(mon, "  memdev: %s\n", di->memdev);
+monitor_printf(mon, "  hotplugged: %s\n",
+   di->hotplugged ? "true" : "false");
+monitor_printf(mon, "  hotpluggable: %s\n",
+   di->hotpluggable ? "true" : "false");
+break;
+default:
+break;
+}
+}
+
+elem = elem->next;
+i++;
+}
+
+qapi_free_MemoryDeviceInfoList(list);
+}
diff --git a/hmp.h b/hmp.h
index 4fd3c4a..4bb5dca 100644
--- a/hmp.h
+++ b/hmp.h
@@ -94,6 +94,7 @@ void hmp_cpu_add(Monitor *mon, const QDict *qdict);
 void hmp_object_add(Monitor *mon, const QDict *qdict);
 void hmp_object_del(Monitor *mon, const QDict *qdict);
 void hmp_info_memdev(Monitor *mon, const QDict *qdict);
+void hmp_info_memory_devices(Monitor *mon, const QDict *qdict);
 void object_add_completion(ReadLineState *rs, int nb_args, const char *str);
 void object_del_completion(ReadLineState *rs, int nb_args, const char *str);
 void device_add_completion(ReadLineState *rs, int nb_args, const char *str);
diff --git a/monitor.c b/monitor.c
index 34cee74..fe88e0d 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2921,6 +2921,13 @@ static mon_cmd_t info_cmds[] = {
 .mhandler.cmd = hmp_info_memdev,
 },
 {
+.name   = "memory-devices",
+.args_type  = "",
+.params = "",
+.help   = "show memory devices",
+.mhandler.cmd = hmp_info_memory_devices,
+},
+{
 .name   = NULL,
 },
 };
-- 
1.9.3

Re: [Qemu-devel] OVMF, Q35 and USB keyboard/mouse

2014-09-10 Thread Laszlo Ersek

On 09/10/14 08:31, Gerd Hoffmann wrote:
>   Hi,
> 
>> So at this point I'm wondering why guests and "info qtree" are contradictory,
>> and whether the combination of OVMF and OS X tickle some qemu usb emulation
>> bug differently than all other (working) combinations ?
> 
> It's due to the way how usb1 compatibility was implemented when ehci was
> introduced ...
> 
> Each usb 2.0 port is linked to both uhci and ehci controller.  ehci
> controls the port routing (i.e. whenever a device shows up @ uhci or
> ehci).  There is one global bit and one per-port bit which control the
> routing.

Is "4.2.1 Port Routing Control via EHCI Configured (CF) Bit" related?
The Configured Flag (CF) is the global one apparently, and the Port
Owner bit is per port.

In "MdeModulePkg/Bus/Pci/EhciDxe/", the CF flag seems to be called
CONFIGFLAG_ROUTE_EHC, and Port Owner is PORTSC_OWNER.

> The global bit says whenever the ports should be routed to ehci by
> default or not.  EHCI driver software is supposed to flip that bit at
> initialization time to get all devices routed to ehci.  Power-on default
> is route everything to uhci, so if the OS knows nothing about ehci all
> usb devices will show up on the uhci companion.
> 
> The per-port bit should be flipped by the ehci driver for usb1 devices,
> so they are handed over to uhci as ehci supports usb2 devices only.
> 
> 
> In qemu this is implemented by having ehci managing the usb bus and uhci
> registering its ports as companion ports.  ehci emulation will either
> handle the devices itself or call uhci, depending on how the guest has
> configured the routing.
> 
> There is even a basic test case for that:  tests/usb-hcd-ehci-test.c
> 
> Because ehci manages the bus the usb devices show up on ehci in 'info
> qtree', no matter how the port routing is configured.
> 
>> Not sure how I'd force the keyboard and mouse onto the default uhci1 and/or
>> uhci2 (from the qtree perspective), since they're given empty IDs by default
>> (and my qemu command line kung-fu is weak in this area).
> 
> Pure uhci working fine hints it most likely is a bug somewhere in the
> port routing code.  Could be in ovmf, but could be qemu too.  You can't
> force it from outsize, port routing is guest business.
> 
> What happens if you build ovmf with uhci but without ehci driver?  Does
> that work?

I'm glad I happened to suggest the same. ;)

Laszlo

[Qemu-devel] [PATCH 03/23] block: Connect BlockBackend to BlockDriverState

2014-09-10 Thread Markus Armbruster

The pointer from BlockBackend to BlockDriverState is a strong
reference, managed with bdrv_ref() / bdrv_unref(), the back-pointer is
a weak one.

Convenience function blk_new_with_bs() creates a BlockBackend with its
BlockDriverState.  Callers have to unref both.  The commit after next
will relieve them of the need to unref the BlockDriverState.

Signed-off-by: Markus Armbruster 
---
 block.c|  10 ++--
 block/block-backend.c  |  79 +++
 blockdev.c |  49 +++-
 hw/block/xen_disk.c|   8 +---
 include/block/block_int.h  |   2 +
 include/sysemu/block-backend.h |   5 ++
 qemu-img.c | 103 +
 qemu-io.c  |   4 +-
 qemu-nbd.c |   3 +-
 9 files changed, 175 insertions(+), 88 deletions(-)

diff --git a/block.c b/block.c
index 4b3bcd4..a6c03da 100644
--- a/block.c
+++ b/block.c
@@ -2032,7 +2032,7 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
  * This will modify the BlockDriverState fields, and swap contents
  * between bs_new and bs_old. Both bs_new and bs_old are modified.
  *
- * bs_new is required to be anonymous.
+ * bs_new must be nameless and not attached to a BlockBackend.
  *
  * This function does not create any image files.
  */
@@ -2051,8 +2051,9 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
 }
 
-/* bs_new must be anonymous and shouldn't have anything fancy enabled */
+/* bs_new must be nameless and shouldn't have anything fancy enabled */
 assert(bs_new->device_name[0] == '\0');
+assert(!bs_new->blk);
 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
 assert(bs_new->job == NULL);
 assert(bs_new->dev == NULL);
@@ -2068,8 +2069,9 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
 bdrv_move_feature_fields(bs_old, bs_new);
 bdrv_move_feature_fields(bs_new, &tmp);
 
-/* bs_new shouldn't be in bdrv_states even after the swap!  */
+/* bs_new must remain nameless and unattached */
 assert(bs_new->device_name[0] == '\0');
+assert(!bs_new->blk);
 
 /* Check a few fields that should remain attached to the device */
 assert(bs_new->dev == NULL);
@@ -2096,7 +2098,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
  * This will modify the BlockDriverState fields, and swap contents
  * between bs_new and bs_top. Both bs_new and bs_top are modified.
  *
- * bs_new is required to be anonymous.
+ * bs_new must be nameless and not attached to a BlockBackend.
  *
  * This function does not create any image files.
  */
diff --git a/block/block-backend.c b/block/block-backend.c
index 833f7d9..deccb54 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -16,6 +16,7 @@
 struct BlockBackend {
 char *name;
 int refcnt;
+BlockDriverState *bs;
 QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
 };
 
@@ -47,9 +48,43 @@ BlockBackend *blk_new(const char *name, Error **errp)
 return blk;
 }
 
+/**
+ * blk_new_with_bs:
+ * @name: name, must not be %NULL or empty
+ * @errp: return location for an error to be set on failure, or %NULL
+ *
+ * Create a new BlockBackend, with a reference count of one, and
+ * attach a new BlockDriverState to it, also with a reference count of
+ * one.  Caller owns *both* references.
+ * TODO Let caller own only the BlockBackend reference
+ * Fail if @name already exists.
+ *
+ * Returns: the BlockBackend on success, %NULL on error
+ */
+BlockBackend *blk_new_with_bs(const char *name, Error **errp)
+{
+BlockBackend *blk;
+BlockDriverState *bs;
+
+blk = blk_new(name, errp);
+if (!blk) {
+return NULL;
+}
+
+bs = bdrv_new_named(name, errp);
+if (!bs) {
+blk_unref(blk);
+return NULL;
+}
+
+blk_attach_bs(blk, bs);
+return blk;
+}
+
 static void blk_delete(BlockBackend *blk)
 {
 assert(!blk->refcnt);
+blk_detach_bs(blk);
 QTAILQ_REMOVE(&blk_backends, blk, link);
 g_free(blk->name);
 g_free(blk);
@@ -70,6 +105,9 @@ void blk_ref(BlockBackend *blk)
  *
  * Decrement @blk's reference count.  If this drops it to zero,
  * destroy @blk.
+ *
+ * Does *not* touch the attached BlockDriverState's reference count.
+ * TODO Decrement it!
  */
 void blk_unref(BlockBackend *blk)
 {
@@ -108,3 +146,44 @@ BlockBackend *blk_next(BlockBackend *blk)
 {
 return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
 }
+
+/**
+ * blk_attach_bs:
+ *
+ * Attach @bs to @blk, taking over the caller's reference to @bs.
+ */
+void blk_attach_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+assert(!blk->bs && !bs->blk);
+blk->bs = bs;
+bs->blk = blk;
+}
+
+/**
+ * blk_bs:
+ *
+ * Returns: the BlockDriverState attached to @blk, or %NULL
+ */
+BlockDriverState *blk_bs(BlockBackend *blk)
+{

[Qemu-devel] [PATCH 01/23] block: Split bdrv_new_named() off bdrv_new()

2014-09-10 Thread Markus Armbruster

Creating an anonymous BDS can't fail.  Make that obvious.

Signed-off-by: Markus Armbruster 
---
 block.c   | 26 +++---
 block/iscsi.c |  2 +-
 block/vvfat.c |  2 +-
 blockdev.c|  2 +-
 hw/block/xen_disk.c   |  2 +-
 include/block/block.h |  3 ++-
 qemu-img.c|  6 +++---
 qemu-io.c |  2 +-
 qemu-nbd.c|  2 +-
 9 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/block.c b/block.c
index d06dd51..4b3bcd4 100644
--- a/block.c
+++ b/block.c
@@ -335,10 +335,11 @@ void bdrv_register(BlockDriver *bdrv)
 }
 
 /* create a new block device (by default it is empty) */
-BlockDriverState *bdrv_new(const char *device_name, Error **errp)
+BlockDriverState *bdrv_new_named(const char *device_name, Error **errp)
 {
 BlockDriverState *bs;
-int i;
+
+assert(*device_name);
 
 if (bdrv_find(device_name)) {
 error_setg(errp, "Device with id '%s' already exists",
@@ -351,12 +352,23 @@ BlockDriverState *bdrv_new(const char *device_name, Error 
**errp)
 return NULL;
 }
 
-bs = g_new0(BlockDriverState, 1);
-QLIST_INIT(&bs->dirty_bitmaps);
+bs = bdrv_new();
+
 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
 if (device_name[0] != '\0') {
 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
 }
+
+return bs;
+}
+
+BlockDriverState *bdrv_new(void)
+{
+BlockDriverState *bs;
+int i;
+
+bs = g_new0(BlockDriverState, 1);
+QLIST_INIT(&bs->dirty_bitmaps);
 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
 QLIST_INIT(&bs->op_blockers[i]);
 }
@@ -1217,7 +1229,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict 
*options, Error **errp)
 goto free_exit;
 }
 
-backing_hd = bdrv_new("", errp);
+backing_hd = bdrv_new();
 
 if (bs->backing_format[0] != '\0') {
 back_drv = bdrv_find_format(bs->backing_format);
@@ -1346,7 +1358,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int 
flags, Error **errp)
 qdict_put(snapshot_options, "file.filename",
   qstring_from_str(tmp_filename));
 
-bs_snapshot = bdrv_new("", &error_abort);
+bs_snapshot = bdrv_new();
 
 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
 flags, bdrv_qcow2, &local_err);
@@ -1417,7 +1429,7 @@ int bdrv_open(BlockDriverState **pbs, const char 
*filename,
 if (*pbs) {
 bs = *pbs;
 } else {
-bs = bdrv_new("", &error_abort);
+bs = bdrv_new();
 }
 
 /* NULL means an empty set of options */
diff --git a/block/iscsi.c b/block/iscsi.c
index 3e19202..af3d0f6 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1528,7 +1528,7 @@ static int iscsi_create(const char *filename, QemuOpts 
*opts, Error **errp)
 IscsiLun *iscsilun = NULL;
 QDict *bs_options;
 
-bs = bdrv_new("", &error_abort);
+bs = bdrv_new();
 
 /* Read out options */
 total_size =
diff --git a/block/vvfat.c b/block/vvfat.c
index 731e591..6c9fde0 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2939,7 +2939,7 @@ static int enable_write_target(BDRVVVFATState *s, Error 
**errp)
 unlink(s->qcow_filename);
 #endif
 
-bdrv_set_backing_hd(s->bs, bdrv_new("", &error_abort));
+bdrv_set_backing_hd(s->bs, bdrv_new());
 s->bs->backing_hd->drv = &vvfat_write_target;
 s->bs->backing_hd->opaque = g_new(void *, 1);
 *(void**)s->bs->backing_hd->opaque = s;
diff --git a/blockdev.c b/blockdev.c
index e919566..9fbd888 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -458,7 +458,7 @@ static DriveInfo *blockdev_init(const char *file, QDict 
*bs_opts,
 /* init */
 dinfo = g_malloc0(sizeof(*dinfo));
 dinfo->id = g_strdup(qemu_opts_id(opts));
-dinfo->bdrv = bdrv_new(dinfo->id, &error);
+dinfo->bdrv = bdrv_new_named(dinfo->id, &error);
 if (error) {
 error_propagate(errp, error);
 goto bdrv_new_err;
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 2dcef07..8bac7ff 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -856,7 +856,7 @@ static int blk_connect(struct XenDevice *xendev)
 
 /* setup via xenbus -> create new block driver instance */
 xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
-blkdev->bs = bdrv_new(blkdev->dev, NULL);
+blkdev->bs = bdrv_new_named(blkdev->dev, NULL);
 if (!blkdev->bs) {
 return -1;
 }
diff --git a/include/block/block.h b/include/block/block.h
index 8f4ad16..95139c0 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -203,7 +203,8 @@ BlockDriver *bdrv_find_whitelisted_format(const char 
*format_name,
 int bdrv_create(BlockDriver *drv, const char* filename,
 QemuOpts *opts, Error **errp);
 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
-BlockDriverState *bdrv_new(const char *device_name, Error **errp);
+BlockDriverState *bdrv_new_named(

[Qemu-devel] [PATCH 10/23] block: Eliminate DriveInfo member bdrv, use blk_by_legacy_dinfo()

2014-09-10 Thread Markus Armbruster

Signed-off-by: Markus Armbruster 
---
 blockdev.c   |  3 +--
 hw/arm/collie.c  |  9 +
 hw/arm/gumstix.c |  5 +++--
 hw/arm/mainstone.c   |  8 
 hw/arm/musicpal.c| 11 ++-
 hw/arm/nseries.c |  6 --
 hw/arm/omap1.c   |  4 +++-
 hw/arm/omap2.c   |  4 +++-
 hw/arm/omap_sx1.c|  9 +
 hw/arm/pxa2xx.c  |  7 +--
 hw/arm/spitz.c   |  4 +++-
 hw/arm/versatilepb.c |  4 +++-
 hw/arm/vexpress.c|  4 +++-
 hw/arm/xilinx_zynq.c |  4 +++-
 hw/arm/z2.c  |  7 ---
 hw/block/fdc.c   | 16 +++-
 hw/block/m25p80.c|  5 +++--
 hw/block/xen_disk.c  |  2 +-
 hw/cris/axis_dev88.c |  3 ++-
 hw/display/tc6393xb.c|  4 +++-
 hw/i386/pc_sysfw.c   |  3 ++-
 hw/ide/piix.c|  6 --
 hw/ide/qdev.c|  4 +++-
 hw/isa/pc87312.c |  7 +--
 hw/lm32/lm32_boards.c| 13 +++--
 hw/lm32/milkymist.c  |  7 ---
 hw/microblaze/petalogix_ml605_mmu.c  |  5 +++--
 hw/microblaze/petalogix_s3adsp1800_mmu.c |  5 +++--
 hw/mips/mips_malta.c |  4 +++-
 hw/mips/mips_r4k.c   |  5 +++--
 hw/pci/pci-hotplug-old.c |  9 ++---
 hw/ppc/ppc405_boards.c   | 25 -
 hw/ppc/spapr.c   |  4 +++-
 hw/ppc/virtex_ml507.c|  5 +++--
 hw/scsi/scsi-bus.c   |  5 +++--
 hw/sd/milkymist-memcard.c|  7 +--
 hw/sd/pl181.c|  3 ++-
 hw/sd/sdhci.c|  3 ++-
 hw/sd/ssi-sd.c   |  3 ++-
 hw/sh4/r2d.c |  5 +++--
 hw/usb/dev-storage.c |  4 +++-
 hw/xtensa/xtfpga.c   |  4 +++-
 include/sysemu/blockdev.h|  1 -
 43 files changed, 163 insertions(+), 93 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 353563e..5c75abd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -219,7 +219,7 @@ static void bdrv_format_print(void *opaque, const char 
*name)
 
 void drive_del(DriveInfo *dinfo)
 {
-blk_unref(dinfo->bdrv->blk);
+blk_unref(blk_by_legacy_dinfo(dinfo));
 }
 
 typedef struct {
@@ -472,7 +472,6 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 
 dinfo = g_malloc0(sizeof(*dinfo));
 dinfo->id = g_strdup(qemu_opts_id(opts));
-dinfo->bdrv = bs;
 blk_set_legacy_dinfo(blk, dinfo);
 
 if (!file || !*file) {
diff --git a/hw/arm/collie.c b/hw/arm/collie.c
index ed7851f..0247290 100644
--- a/hw/arm/collie.c
+++ b/hw/arm/collie.c
@@ -15,6 +15,7 @@
 #include "strongarm.h"
 #include "hw/arm/arm.h"
 #include "hw/block/flash.h"
+#include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "exec/address-spaces.h"
 
@@ -41,13 +42,13 @@ static void collie_init(MachineState *machine)
 
 dinfo = drive_get(IF_PFLASH, 0, 0);
 pflash_cfi01_register(SA_CS0, NULL, "collie.fl1", 0x0200,
-dinfo ? dinfo->bdrv : NULL, (64 * 1024),
-512, 4, 0x00, 0x00, 0x00, 0x00, 0);
+dinfo ? blk_bs(blk_by_legacy_dinfo(dinfo)) : NULL,
+(64 * 1024), 512, 4, 0x00, 0x00, 0x00, 0x00, 0);
 
 dinfo = drive_get(IF_PFLASH, 0, 1);
 pflash_cfi01_register(SA_CS1, NULL, "collie.fl2", 0x0200,
-dinfo ? dinfo->bdrv : NULL, (64 * 1024),
-512, 4, 0x00, 0x00, 0x00, 0x00, 0);
+dinfo ? blk_bs(blk_by_legacy_dinfo(dinfo)) : NULL,
+(64 * 1024), 512, 4, 0x00, 0x00, 0x00, 0x00, 0);
 
 sysbus_create_simple("scoop", 0x4080, NULL);
 
diff --git a/hw/arm/gumstix.c b/hw/arm/gumstix.c
index 3f8465e..49f9339 100644
--- a/hw/arm/gumstix.c
+++ b/hw/arm/gumstix.c
@@ -40,6 +40,7 @@
 #include "hw/block/flash.h"
 #include "hw/devices.h"
 #include "hw/boards.h"
+#include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "exec/address-spaces.h"
 #include "sysemu/qtest.h"
@@ -71,7 +72,7 @@ static void connex_init(MachineState *machine)
 be = 0;
 #endif
 if (!pflash_cfi01_register(0x, NULL, "connext.rom", connex_rom,
-   dinfo ? dinfo->bdrv : NULL,
+   dinfo ? blk_bs(blk_by_legacy_dinfo(dinfo)) : 
NULL,
sector_len, connex_rom / sector_len,
2, 0, 0, 0, 0, be)) {
 fprintf(stderr, "qem

[Qemu-devel] [PATCH 09/23] block: Merge BlockBackend and BlockDriverState name spaces

2014-09-10 Thread Markus Armbruster

BlockBackend's name space is separate only to keep the patches simple.
Time to merge the two.

Signed-off-by: Markus Armbruster 
---
 block.c   | 11 +++
 block/block-backend.c | 10 +-
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/block.c b/block.c
index 61ea15d..34c8f8c 100644
--- a/block.c
+++ b/block.c
@@ -850,7 +850,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
 }
 
 /* takes care of avoiding namespaces collisions */
-if (bdrv_find(node_name)) {
+if (blk_by_name(node_name)) {
 error_setg(errp, "node-name=%s is conflicting with a device id",
node_name);
 return;
@@ -3765,14 +3765,9 @@ void bdrv_iterate_format(void (*it)(void *opaque, const 
char *name),
 /* This function is to find block backend bs */
 BlockDriverState *bdrv_find(const char *name)
 {
-BlockDriverState *bs;
+BlockBackend *blk = blk_by_name(name);
 
-for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
-if (!strcmp(name, bdrv_get_device_name(bs))) {
-return bs;
-}
-}
-return NULL;
+return blk ? blk_bs(blk) : NULL;
 }
 
 /* This function is to find a node in the bs graph */
diff --git a/block/block-backend.c b/block/block-backend.c
index 2f10d6a..b2db97b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -46,6 +46,11 @@ BlockBackend *blk_new(const char *name, Error **errp)
 error_setg(errp, "Device with id '%s' already exists", name);
 return NULL;
 }
+if (bdrv_find_node(name)) {
+error_setg(errp, "Device with node-name '%s' already exists", name);
+return NULL;
+}
+
 blk->name = g_strdup(name);
 blk->refcnt = 1;
 QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
@@ -67,11 +72,6 @@ BlockBackend *blk_new_with_bs(const char *name, Error **errp)
 BlockBackend *blk;
 BlockDriverState *bs;
 
-if (bdrv_find_node(name)) {
-error_setg(errp, "Device with node-name '%s' already exists", name);
-return NULL;
-}
-
 blk = blk_new(name, errp);
 if (!blk) {
 return NULL;
-- 
1.9.3

[Qemu-devel] [PATCH 06/23] block: Eliminate bdrv_states, use block_next() instead

2014-09-10 Thread Markus Armbruster

Signed-off-by: Markus Armbruster 
---
 block.c   | 43 +++
 block/block-backend.c |  4 
 include/block/block_int.h |  2 --
 3 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/block.c b/block.c
index a6c03da..89f9cf0 100644
--- a/block.c
+++ b/block.c
@@ -24,6 +24,7 @@
 #include "config-host.h"
 #include "qemu-common.h"
 #include "trace.h"
+#include "sysemu/block-backend.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
 #include "qemu/module.h"
@@ -90,9 +91,6 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
 
-static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
-QTAILQ_HEAD_INITIALIZER(bdrv_states);
-
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
 
@@ -355,9 +353,6 @@ BlockDriverState *bdrv_new_named(const char *device_name, 
Error **errp)
 bs = bdrv_new();
 
 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
-if (device_name[0] != '\0') {
-QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
-}
 
 return bs;
 }
@@ -1888,7 +1883,7 @@ void bdrv_close_all(void)
 {
 BlockDriverState *bs;
 
-QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
 AioContext *aio_context = bdrv_get_aio_context(bs);
 
 aio_context_acquire(aio_context);
@@ -1939,7 +1934,7 @@ void bdrv_drain_all(void)
 while (busy) {
 busy = false;
 
-QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
 AioContext *aio_context = bdrv_get_aio_context(bs);
 bool bs_busy;
 
@@ -1960,9 +1955,6 @@ void bdrv_drain_all(void)
Also, NULL terminate the device_name to prevent double remove */
 void bdrv_make_anon(BlockDriverState *bs)
 {
-if (bs->device_name[0] != '\0') {
-QTAILQ_REMOVE(&bdrv_states, bs, device_list);
-}
 bs->device_name[0] = '\0';
 if (bs->node_name[0] != '\0') {
 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
@@ -2016,10 +2008,9 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 /* job */
 bs_dest->job= bs_src->job;
 
-/* keep the same entry in bdrv_states */
 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
 bs_src->device_name);
-bs_dest->device_list = bs_src->device_list;
+
 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
sizeof(bs_dest->op_blockers));
 }
@@ -2363,7 +2354,7 @@ int bdrv_commit_all(void)
 {
 BlockDriverState *bs;
 
-QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
 AioContext *aio_context = bdrv_get_aio_context(bs);
 
 aio_context_acquire(aio_context);
@@ -3807,7 +3798,7 @@ BlockDriverState *bdrv_find(const char *name)
 {
 BlockDriverState *bs;
 
-QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
 if (!strcmp(name, bs->device_name)) {
 return bs;
 }
@@ -3888,17 +3879,21 @@ bool bdrv_chain_contains(BlockDriverState *top, 
BlockDriverState *base)
 
 BlockDriverState *bdrv_next(BlockDriverState *bs)
 {
-if (!bs) {
-return QTAILQ_FIRST(&bdrv_states);
-}
-return QTAILQ_NEXT(bs, device_list);
+BlockBackend *blk;
+
+for (blk = blk_next(bs ? bs->blk : NULL);
+ blk && !blk_bs(blk);
+ blk = blk_next(blk))
+;
+
+return blk ? blk_bs(blk) : NULL;
 }
 
 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
 {
 BlockDriverState *bs;
 
-QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
 it(opaque, bs);
 }
 }
@@ -3918,7 +3913,7 @@ int bdrv_flush_all(void)
 BlockDriverState *bs;
 int result = 0;
 
-QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
 AioContext *aio_context = bdrv_get_aio_context(bs);
 int ret;
 
@@ -5065,7 +5060,7 @@ void bdrv_invalidate_cache_all(Error **errp)
 BlockDriverState *bs;
 Error *local_err = NULL;
 
-QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
 AioContext *aio_context = bdrv_get_aio_context(bs);
 
 aio_context_acquire(aio_context);
@@ -5082,7 +5077,7 @@ void bdrv_clear_incoming_migration_all(void)
 {
 BlockDriverState *bs;
 
-QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
 AioContext *aio_context = bdrv_get_aio_context(bs);
 
 aio_context_acquire(aio_context);
@@ -5918,7 +5913,7 @@ bool bdrv_is_

[Qemu-devel] [PATCH 07/23] block: Eliminate bdrv_iterate(), use bdrv_next()

2014-09-10 Thread Markus Armbruster

Signed-off-by: Markus Armbruster 
---
 block-migration.c | 30 +++---
 block.c   |  9 -
 blockdev.c| 31 +--
 include/block/block.h |  2 --
 monitor.c | 33 +
 5 files changed, 37 insertions(+), 68 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 3ad31a2..cb3e16c 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -343,12 +343,25 @@ static void unset_dirty_tracking(void)
 }
 }
 
-static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
+static void init_blk_migration(QEMUFile *f)
 {
+BlockDriverState *bs;
 BlkMigDevState *bmds;
 int64_t sectors;
 
-if (!bdrv_is_read_only(bs)) {
+block_mig_state.submitted = 0;
+block_mig_state.read_done = 0;
+block_mig_state.transferred = 0;
+block_mig_state.total_sector_sum = 0;
+block_mig_state.prev_progress = -1;
+block_mig_state.bulk_completed = 0;
+block_mig_state.zero_blocks = migrate_zero_blocks();
+
+for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
+if (bdrv_is_read_only(bs)) {
+continue;
+}
+
 sectors = bdrv_nb_sectors(bs);
 if (sectors <= 0) {
 return;
@@ -378,19 +391,6 @@ static void init_blk_migration_it(void *opaque, 
BlockDriverState *bs)
 }
 }
 
-static void init_blk_migration(QEMUFile *f)
-{
-block_mig_state.submitted = 0;
-block_mig_state.read_done = 0;
-block_mig_state.transferred = 0;
-block_mig_state.total_sector_sum = 0;
-block_mig_state.prev_progress = -1;
-block_mig_state.bulk_completed = 0;
-block_mig_state.zero_blocks = migrate_zero_blocks();
-
-bdrv_iterate(init_blk_migration_it, NULL);
-}
-
 /* Called with no lock taken.  */
 
 static int blk_mig_save_bulked_block(QEMUFile *f)
diff --git a/block.c b/block.c
index 89f9cf0..593d89b 100644
--- a/block.c
+++ b/block.c
@@ -3889,15 +3889,6 @@ BlockDriverState *bdrv_next(BlockDriverState *bs)
 return blk ? blk_bs(blk) : NULL;
 }
 
-void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
-{
-BlockDriverState *bs;
-
-for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
-it(opaque, bs);
-}
-}
-
 const char *bdrv_get_device_name(BlockDriverState *bs)
 {
 return bs->device_name;
diff --git a/blockdev.c b/blockdev.c
index 791f6d9..353563e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2520,26 +2520,21 @@ fail:
 qmp_output_visitor_cleanup(ov);
 }
 
-static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
-{
-BlockJobInfoList **prev = opaque;
-BlockJob *job = bs->job;
-
-if (job) {
-BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
-elem->value = block_job_query(bs->job);
-(*prev)->next = elem;
-*prev = elem;
-}
-}
-
 BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 {
-/* Dummy is a fake list element for holding the head pointer */
-BlockJobInfoList dummy = {};
-BlockJobInfoList *prev = &dummy;
-bdrv_iterate(do_qmp_query_block_jobs_one, &prev);
-return dummy.next;
+BlockJobInfoList *head = NULL, **p_next = &head;
+BlockDriverState *bs;
+
+for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
+if (bs->job) {
+BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
+elem->value = block_job_query(bs->job);
+*p_next = elem;
+p_next = &elem->next;
+}
+}
+
+return head;
 }
 
 QemuOptsList qemu_common_drive_opts = {
diff --git a/include/block/block.h b/include/block/block.h
index 95139c0..8cf9ea3 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -409,8 +409,6 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
  Error **errp);
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
 BlockDriverState *bdrv_next(BlockDriverState *bs);
-void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs),
-  void *opaque);
 int bdrv_is_encrypted(BlockDriverState *bs);
 int bdrv_key_required(BlockDriverState *bs);
 int bdrv_set_key(BlockDriverState *bs, const char *key);
diff --git a/monitor.c b/monitor.c
index 34cee74..4ae66df 100644
--- a/monitor.c
+++ b/monitor.c
@@ -4208,24 +4208,6 @@ static void file_completion(Monitor *mon, const char 
*input)
 closedir(ffs);
 }
 
-typedef struct MonitorBlockComplete {
-Monitor *mon;
-const char *input;
-} MonitorBlockComplete;
-
-static void block_completion_it(void *opaque, BlockDriverState *bs)
-{
-const char *name = bdrv_get_device_name(bs);
-MonitorBlockComplete *mbc = opaque;
-Monitor *mon = mbc->mon;
-const char *input = mbc->input;
-
-if (input[0] == '\0' ||
-!strncmp(name, (char *)input, strlen(input))) {
-readline_add_completion(mon->rs, name);
-}
-}
-
 static const char *next_arg_type(const char *types

[Qemu-devel] [PATCH 23/23] block: Make device model's references to BlockBackend strong

2014-09-10 Thread Markus Armbruster

Doesn't make a difference just yet, but it's the right thing to do.

Signed-off-by: Markus Armbruster 
---
 block/block-backend.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 6c0c8f2..7ad4e44 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -253,6 +253,7 @@ int blk_attach_dev(BlockBackend *blk, void *dev)
 if (blk->dev) {
 return -EBUSY;
 }
+blk_ref(blk);
 blk->dev = dev;
 bdrv_iostatus_reset(blk->bs);
 
@@ -273,9 +274,10 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
 /* TODO change to DeviceState *dev when all users are qdevified */
 {
 assert(blk->dev == dev);
-blk->dev = NULL;
 blk->dev_ops = NULL;
 blk->dev_opaque = NULL;
+blk->dev = NULL;
+blk_unref(blk);
 bdrv_set_guest_block_size(blk->bs, 512);
 qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
 }
-- 
1.9.3

[Qemu-devel] [PATCH 08/23] block: Eliminate BlockDriverState member device_name[]

2014-09-10 Thread Markus Armbruster

device_name[] is can become non-empty only in bdrv_new_named() and
bdrv_move_feature_fields().  The latter is used only to undo damage
done by bdrv_swap().  The former is called only by blk_new_with_bs().
Therefore, when a BlockDriverState's device_name[] is non-empty, then
it's owned by a BlockBackend.

The converse is also true, because blk_attach_bs() is called only by
blk_new_with_bs() so far.

Furthermore, blk_new_with_bs() keeps the two names equal.

Therefore, device_name[] is redundant.  Eliminate it.

Signed-off-by: Markus Armbruster 
---
 block-migration.c | 12 +
 block.c   | 63 ++-
 block/block-backend.c | 12 -
 block/cow.c   |  2 +-
 block/mirror.c|  3 ++-
 block/qapi.c  |  6 ++---
 block/qcow.c  |  4 +--
 block/qcow2.c |  4 +--
 block/qed.c   |  2 +-
 block/quorum.c|  4 +--
 block/vdi.c   |  2 +-
 block/vhdx.c  |  2 +-
 block/vmdk.c  |  4 +--
 block/vpc.c   |  2 +-
 block/vvfat.c |  2 +-
 blockjob.c|  3 ++-
 include/block/block.h |  3 +--
 include/block/block_int.h |  2 --
 18 files changed, 53 insertions(+), 79 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index cb3e16c..da30e93 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -14,7 +14,9 @@
  */
 
 #include "qemu-common.h"
-#include "block/block_int.h"
+#include "block/block.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
 #include "hw/hw.h"
 #include "qemu/queue.h"
 #include "qemu/timer.h"
@@ -130,9 +132,9 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk)
  | flags);
 
 /* device name */
-len = strlen(blk->bmds->bs->device_name);
+len = strlen(bdrv_get_device_name(blk->bmds->bs));
 qemu_put_byte(f, len);
-qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
+qemu_put_buffer(f, (uint8_t *)bdrv_get_device_name(blk->bmds->bs), len);
 
 /* if a block is zero we need to flush here since the network
  * bandwidth is now a lot higher than the storage device bandwidth.
@@ -382,9 +384,9 @@ static void init_blk_migration(QEMUFile *f)
 
 if (bmds->shared_base) {
 DPRINTF("Start migration for %s with shared base image\n",
-bs->device_name);
+bdrv_get_device_name(bs));
 } else {
-DPRINTF("Start full migration for %s\n", bs->device_name);
+DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs));
 }
 
 QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
diff --git a/block.c b/block.c
index 593d89b..61ea15d 100644
--- a/block.c
+++ b/block.c
@@ -332,31 +332,6 @@ void bdrv_register(BlockDriver *bdrv)
 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 }
 
-/* create a new block device (by default it is empty) */
-BlockDriverState *bdrv_new_named(const char *device_name, Error **errp)
-{
-BlockDriverState *bs;
-
-assert(*device_name);
-
-if (bdrv_find(device_name)) {
-error_setg(errp, "Device with id '%s' already exists",
-   device_name);
-return NULL;
-}
-if (bdrv_find_node(device_name)) {
-error_setg(errp, "Device with node-name '%s' already exists",
-   device_name);
-return NULL;
-}
-
-bs = bdrv_new();
-
-pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
-
-return bs;
-}
-
 BlockDriverState *bdrv_new(void)
 {
 BlockDriverState *bs;
@@ -1159,7 +1134,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd)
 } else if (backing_hd) {
 error_setg(&bs->backing_blocker,
"device is used as backing hd of '%s'",
-   bs->device_name);
+   bdrv_get_device_name(bs));
 }
 
 bs->backing_hd = backing_hd;
@@ -1533,7 +1508,7 @@ int bdrv_open(BlockDriverState **pbs, const char 
*filename,
 } else {
 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
"support the option '%s'", drv->format_name,
-   bs->device_name, entry->key);
+   bdrv_get_device_name(bs), entry->key);
 }
 
 ret = -EINVAL;
@@ -1740,7 +1715,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 
BlockReopenQueue *queue,
 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
 reopen_state->flags & BDRV_O_RDWR) {
 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
-  reopen_state->bs->device_name);
+  bdrv_get_device_name(reopen_state->bs));
 goto error;
 }
 
@@ -1767,7 +1742,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 
BlockReopenQueue *queue,
 /* It is currently mandatory to have a bdrv_reopen_prepare()

[Qemu-devel] [PATCH 00/23] Split BlockBackend off BDS with an axe

2014-09-10 Thread Markus Armbruster

My last attempt got bogged down because I tried to do a reasonably
complete job, and the complexity proved more than I could handle with
the limited amount of uninterrupted time available.  This time, I'm
cutting BlockBackend off with an axe, leaving most of the work for
later.

Done in this series already:

* Introduce a BlockBackend type, and lift up BlockDriverState's
  device_name, device_list, dev, dev_ops, dev_opaque.  Much more
  remains to be lifted.

* Make BlockBackend own the DriveInfo.

* Wean hw/ off BlockDriverState, with two small exceptions.

* Fix blockdev-add not to create a bogus IDE drive (0,0).

* Take a few baby steps towards use of BlockBackend in monitor command
  code where appropriate.

Coming soon, hopefully:

* QMP command blockdev-del

* blockdev-add accepts node-name without id at top level

* Lift up more stuff

* More monitor command code BlockBackend use

Depends on my [PATCH 0/4] Block-related miscellaneous cleanups.

I know the diffstat looks intimidating.  I tried very hard to split
the patches so that the bigger ones do just one simple thing, and
mostly mechanically.

Markus Armbruster (23):
  block: Split bdrv_new_named() off bdrv_new()
  block: New BlockBackend
  block: Connect BlockBackend to BlockDriverState
  block: Connect BlockBackend and DriveInfo
  block: Make BlockBackend own its BlockDriverState
  block: Eliminate bdrv_states, use block_next() instead
  block: Eliminate bdrv_iterate(), use bdrv_next()
  block: Eliminate BlockDriverState member device_name[]
  block: Merge BlockBackend and BlockDriverState name spaces
  block: Eliminate DriveInfo member bdrv, use blk_by_legacy_dinfo()
  block: Rename BlockDriverAIOCB* to BlockAIOCB*
  virtio-blk: Drop redundant VirtIOBlock member conf
  virtio-blk: Rename VirtIOBlkConf variables to conf
  hw: Convert from BlockDriverState to BlockBackend, mostly
  ide: Complete conversion from BlockDriverState to BlockBackend
  pc87312: Drop unused members of PC87312State
  blockdev: Drop superfluous DriveInfo member id
  blockdev: Fix blockdev-add not to create IDE drive (0,0)
  blockdev: Drop DriveInfo member enable_auto_del
  block/qapi: Convert qmp_query_block() to BlockBackend
  blockdev: Convert qmp_eject(), qmp_change_blockdev() to BlockBackend
  block: Lift device model API into BlockBackend
  block: Make device model's references to BlockBackend strong

 block-migration.c|  44 +--
 block.c  | 389 +++-
 block/Makefile.objs  |   2 +-
 block/archipelago.c  |  30 +-
 block/backup.c   |   2 +-
 block/blkdebug.c |  22 +-
 block/blkverify.c|  20 +-
 block/block-backend.c| 588 +++
 block/commit.c   |   2 +-
 block/cow.c  |   2 +-
 block/curl.c |   8 +-
 block/iscsi.c|  10 +-
 block/linux-aio.c|   8 +-
 block/mirror.c   |   9 +-
 block/qapi.c |  27 +-
 block/qcow.c |   4 +-
 block/qcow2.c|   4 +-
 block/qed-gencb.c|   4 +-
 block/qed-table.c|  10 +-
 block/qed.c  |  48 +--
 block/qed.h  |  12 +-
 block/quorum.c   |  42 +--
 block/raw-aio.h  |   8 +-
 block/raw-posix.c|  32 +-
 block/raw-win32.c|  16 +-
 block/raw_bsd.c  |   8 +-
 block/rbd.c  |  58 +--
 block/sheepdog.c |   4 +-
 block/stream.c   |   2 +-
 block/vdi.c  |   2 +-
 block/vhdx.c |   2 +-
 block/vmdk.c |   4 +-
 block/vpc.c  |   2 +-
 block/vvfat.c|   4 +-
 block/win32-aio.c|   8 +-
 blockdev.c   | 204 +--
 blockjob.c   |   7 +-
 dma-helpers.c|  69 ++--
 hw/arm/collie.c  |  10 +-
 hw/arm/gumstix.c |   6 +-
 hw/arm/highbank.c|   2 +-
 hw/arm/mainstone.c   |   8 +-
 hw/arm/musicpal.c|  13 +-
 hw/arm/nseries.c |   7 +-
 hw/arm/omap1.c   |   4 +-
 hw/arm/omap2.c   |   4 +-
 hw/arm/omap_sx1.c|  10 +-
 hw/arm/pxa2xx.c  |   7 +-
 hw/arm/realview.c|   2 +-
 hw/arm/spitz.c   |   6 +-
 hw

[Qemu-devel] [PATCH 21/23] blockdev: Convert qmp_eject(), qmp_change_blockdev() to BlockBackend

2014-09-10 Thread Markus Armbruster

Much more command code needs conversion.  I'm converting these now
because they's using bdrv_dev_* functions, which I'm about to lift
into BlockBackend.

Signed-off-by: Markus Armbruster 
---
 blockdev.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 317239c..6286c0e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1501,8 +1501,10 @@ exit:
 }
 
 
-static void eject_device(BlockDriverState *bs, int force, Error **errp)
+static void eject_device(BlockBackend *blk, int force, Error **errp)
 {
+BlockDriverState *bs = blk_bs(blk);
+
 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
 return;
 }
@@ -1526,15 +1528,15 @@ static void eject_device(BlockDriverState *bs, int 
force, Error **errp)
 
 void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
 {
-BlockDriverState *bs;
+BlockBackend *blk;
 
-bs = bdrv_find(device);
-if (!bs) {
+blk = blk_by_name(device);
+if (!blk) {
 error_set(errp, QERR_DEVICE_NOT_FOUND, device);
 return;
 }
 
-eject_device(bs, force, errp);
+eject_device(blk, force, errp);
 }
 
 void qmp_block_passwd(bool has_device, const char *device,
@@ -1593,16 +1595,18 @@ static void qmp_bdrv_open_encrypted(BlockDriverState 
*bs, const char *filename,
 void qmp_change_blockdev(const char *device, const char *filename,
  const char *format, Error **errp)
 {
+BlockBackend *blk;
 BlockDriverState *bs;
 BlockDriver *drv = NULL;
 int bdrv_flags;
 Error *err = NULL;
 
-bs = bdrv_find(device);
-if (!bs) {
+blk = blk_by_name(device);
+if (!blk) {
 error_set(errp, QERR_DEVICE_NOT_FOUND, device);
 return;
 }
+bs = blk_bs(blk);
 
 if (format) {
 drv = bdrv_find_whitelisted_format(format, bs->read_only);
@@ -1612,7 +1616,7 @@ void qmp_change_blockdev(const char *device, const char 
*filename,
 }
 }
 
-eject_device(bs, 0, &err);
+eject_device(blk, 0, &err);
 if (err) {
 error_propagate(errp, err);
 return;
-- 
1.9.3

[Qemu-devel] [PATCH 04/23] block: Connect BlockBackend and DriveInfo

2014-09-10 Thread Markus Armbruster

Make the BlockBackend own the DriveInfo.  Change blockdev_init() to
return the BlockBackend instead of the DriveInfo.

Signed-off-by: Markus Armbruster 
---
 block/block-backend.c | 38 +++
 blockdev.c| 79 +--
 include/sysemu/blockdev.h |  4 +++
 3 files changed, 78 insertions(+), 43 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index deccb54..2a22660 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -12,14 +12,18 @@
 
 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
+#include "sysemu/blockdev.h"
 
 struct BlockBackend {
 char *name;
 int refcnt;
 BlockDriverState *bs;
+DriveInfo *legacy_dinfo;
 QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
 };
 
+static void drive_info_del(DriveInfo *dinfo);
+
 static QTAILQ_HEAD(, BlockBackend) blk_backends =
 QTAILQ_HEAD_INITIALIZER(blk_backends);
 
@@ -87,6 +91,7 @@ static void blk_delete(BlockBackend *blk)
 blk_detach_bs(blk);
 QTAILQ_REMOVE(&blk_backends, blk, link);
 g_free(blk->name);
+drive_info_del(blk->legacy_dinfo);
 g_free(blk);
 }
 
@@ -119,6 +124,16 @@ void blk_unref(BlockBackend *blk)
 }
 }
 
+static void drive_info_del(DriveInfo *dinfo)
+{
+if (dinfo) {
+qemu_opts_del(dinfo->opts);
+g_free(dinfo->id);
+g_free(dinfo->serial);
+g_free(dinfo);
+}
+}
+
 const char *blk_name(BlockBackend *blk)
 {
 return blk->name;
@@ -187,3 +202,26 @@ BlockDriverState *blk_detach_bs(BlockBackend *blk)
 }
 return bs;
 }
+
+DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
+{
+return blk->legacy_dinfo;
+}
+
+DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
+{
+assert(!blk->legacy_dinfo);
+return blk->legacy_dinfo = dinfo;
+}
+
+BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
+{
+BlockBackend *blk;
+
+QTAILQ_FOREACH(blk, &blk_backends, link) {
+if (blk->legacy_dinfo == dinfo) {
+return blk;
+}
+}
+assert(0);
+}
diff --git a/blockdev.c b/blockdev.c
index 0a0b95e..73e2da9 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -47,8 +47,6 @@
 #include "trace.h"
 #include "sysemu/arch_init.h"
 
-static QTAILQ_HEAD(drivelist, DriveInfo) drives = 
QTAILQ_HEAD_INITIALIZER(drives);
-
 static const char *const if_name[IF_COUNT] = {
 [IF_NONE] = "none",
 [IF_IDE] = "ide",
@@ -89,7 +87,8 @@ static const int if_max_devs[IF_COUNT] = {
  */
 void blockdev_mark_auto_del(BlockDriverState *bs)
 {
-DriveInfo *dinfo = drive_get_by_blockdev(bs);
+BlockBackend *blk = bs->blk;
+DriveInfo *dinfo = blk_legacy_dinfo(blk);
 
 if (dinfo && !dinfo->enable_auto_del) {
 return;
@@ -105,7 +104,8 @@ void blockdev_mark_auto_del(BlockDriverState *bs)
 
 void blockdev_auto_del(BlockDriverState *bs)
 {
-DriveInfo *dinfo = drive_get_by_blockdev(bs);
+BlockBackend *blk = bs->blk;
+DriveInfo *dinfo = blk_legacy_dinfo(blk);
 
 if (dinfo && dinfo->auto_del) {
 drive_del(dinfo);
@@ -153,15 +153,15 @@ QemuOpts *drive_add(BlockInterfaceType type, int index, 
const char *file,
 
 DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
 {
+BlockBackend *blk;
 DriveInfo *dinfo;
 
-/* seek interface, bus and unit */
-
-QTAILQ_FOREACH(dinfo, &drives, next) {
-if (dinfo->type == type &&
-   dinfo->bus == bus &&
-   dinfo->unit == unit)
+for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+dinfo = blk_legacy_dinfo(blk);
+if (dinfo && dinfo->type == type
+&& dinfo->bus == bus && dinfo->unit == unit) {
 return dinfo;
+}
 }
 
 return NULL;
@@ -177,13 +177,15 @@ DriveInfo *drive_get_by_index(BlockInterfaceType type, 
int index)
 int drive_get_max_bus(BlockInterfaceType type)
 {
 int max_bus;
+BlockBackend *blk;
 DriveInfo *dinfo;
 
 max_bus = -1;
-QTAILQ_FOREACH(dinfo, &drives, next) {
-if(dinfo->type == type &&
-   dinfo->bus > max_bus)
+for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+dinfo = blk_legacy_dinfo(blk);
+if (dinfo && dinfo->type == type && dinfo->bus > max_bus) {
 max_bus = dinfo->bus;
+}
 }
 return max_bus;
 }
@@ -200,11 +202,11 @@ DriveInfo *drive_get_next(BlockInterfaceType type)
 
 DriveInfo *drive_get_by_blockdev(BlockDriverState *bs)
 {
-DriveInfo *dinfo;
+BlockBackend *blk;
 
-QTAILQ_FOREACH(dinfo, &drives, next) {
-if (dinfo->bdrv == bs) {
-return dinfo;
+for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
+if (blk_bs(blk) == bs) {
+return blk_legacy_dinfo(blk);
 }
 }
 return NULL;
@@ -217,16 +219,10 @@ static void bdrv_format_print(void *opaque, const char 
*name)
 
 void drive_del(DriveInfo *dinfo)
 {
-if (dinfo->opts) {
-qemu_opts_del(dinfo->opts);
-}
+

[Qemu-devel] [PATCH 20/23] block/qapi: Convert qmp_query_block() to BlockBackend

2014-09-10 Thread Markus Armbruster

Much more command code needs conversion.  I start with this one
because it's using bdrv_dev_* functions, which I'm about to lift into
BlockBackend.

While there, give bdrv_query_info() internal linkage.

Signed-off-by: Markus Armbruster 
---
 block/qapi.c | 15 ---
 include/block/qapi.h |  3 ---
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index cc8f711..02121b2 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -28,6 +28,7 @@
 #include "qapi-visit.h"
 #include "qapi/qmp-output-visitor.h"
 #include "qapi/qmp/types.h"
+#include "sysemu/block-backend.h"
 #ifdef __linux__
 #include 
 #include 
@@ -264,15 +265,15 @@ void bdrv_query_image_info(BlockDriverState *bs,
 }
 
 /* @p_info will be set only on success. */
-void bdrv_query_info(BlockDriverState *bs,
- BlockInfo **p_info,
- Error **errp)
+static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
+Error **errp)
 {
 BlockInfo *info = g_malloc0(sizeof(*info));
+BlockDriverState *bs = blk_bs(blk);
 BlockDriverState *bs0;
 ImageInfo **p_image_info;
 Error *local_err = NULL;
-info->device = g_strdup(bdrv_get_device_name(bs));
+info->device = g_strdup(blk_name(blk));
 info->type = g_strdup("unknown");
 info->locked = bdrv_dev_is_medium_locked(bs);
 info->removable = bdrv_dev_has_removable_media(bs);
@@ -359,12 +360,12 @@ static BlockStats *bdrv_query_stats(const 
BlockDriverState *bs)
 BlockInfoList *qmp_query_block(Error **errp)
 {
 BlockInfoList *head = NULL, **p_next = &head;
-BlockDriverState *bs = NULL;
+BlockBackend *blk;
 Error *local_err = NULL;
 
- while ((bs = bdrv_next(bs))) {
+for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 BlockInfoList *info = g_malloc0(sizeof(*info));
-bdrv_query_info(bs, &info->value, &local_err);
+bdrv_query_info(blk, &info->value, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto err;
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 0374546..168d788 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -36,9 +36,6 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs,
 void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
Error **errp);
-void bdrv_query_info(BlockDriverState *bs,
- BlockInfo **p_info,
- Error **errp);
 
 void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f,
 QEMUSnapshotInfo *sn);
-- 
1.9.3

Re: [Qemu-devel] [PATCH] virtio-pci: enable bus master for old guests

2014-09-10 Thread Nikunj A Dadhania

Greg Kurz  writes:

> On Mon, 8 Sep 2014 19:05:02 +0300
> "Michael S. Tsirkin"  wrote:
>
>> commit cc943c36faa192cd4b32af8fe5edb31894017d35
>> pci: Use bus master address space for delivering MSI/MSI-X messages
>> breaks virtio-net for rhel6.[56] x86 guests because they don't
>> enable bus mastering for virtio PCI devices
>> 
>> Old guests forgot to enable bus mastering, enable it
>> automatically on DRIVER_OK.
>> 
>> Note: we should either back out the original patch from
>> stable or apply this one on top.
>> 
>> Cc: qemu-sta...@nongnu.org
>> Reported-by: Greg Kurz 
>> Signed-off-by: Jan Kiszka 
>> Signed-off-by: Michael S. Tsirkin 
>> ---
>>  hw/virtio/virtio-pci.c | 2 ++
>>  1 file changed, 2 insertions(+)
>> 
>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>> index ddb5da1..af937d2 100644
>> --- a/hw/virtio/virtio-pci.c
>> +++ b/hw/virtio/virtio-pci.c
>> @@ -320,6 +320,8 @@ static void virtio_ioport_write(void *opaque, uint32_t 
>> addr, uint32_t val)
>>  if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
>>  !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
>>  proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
>> +
>> memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
>> +  true);
>>  }
>>  break;
>>  case VIRTIO_MSI_CONFIG_VECTOR:
>
> Cc'ing Alexey for some SLOF and early boot of the ppc64 kernel expertise.
>
> Michael,
>
> This was enough to fix virtio-net in the rhel6.5 x86 guest case. 
> Unfortunately,
> this fails for rhel6.5 ppc64 because it is never called... 

> I did some debugging: it looks like the guest kernel calls the OF
> quisece call to flush pending DMA and disables bus master on the
> virtio-blk device (PCI_COMMAND == 0x3).

Getting confused, above you are talking about virtio-net and here it is
virtio-blk.

Anyways, the routines still remains same for both of them.  From SLOF
during init we set DRIVER_OK, and after using the device during the
quiesce, called from linux kernel VIRTIO_CONFIG_S_FAILED is set and then
a VIRTIO_DEVICE_RESET is done.

> The guest then continues to boot and hangs... It appears that waiting
> for the guest to issue VIRTIO_CONFIG_S_DRIVER_OK is not enough. Since
> we need this for MSI to work, I tried the following and it fixes the
> issue:
>
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index af937d2..3d72aa8 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -111,9 +111,14 @@ static void virtio_pci_notify(DeviceState *d, uint16_t 
> vector)
>  {
>  VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
>  
> -if (msix_enabled(&proxy->pci_dev))
> +if (msix_enabled(&proxy->pci_dev)) {
> +if (!(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
> +proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
> +
> memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
> +  true);
> +}
>  msix_notify(&proxy->pci_dev, vector);
> -else {
> +} else {
>  VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
>  pci_set_irq(&proxy->pci_dev, vdev->isr & 1);
>  }
>
> If this is acceptable, I'll make it a helper and squash it into your patch.
>
> Thoughts ?
>
> -- 
> Gregory Kurz kurzg...@fr.ibm.com
>  gk...@linux.vnet.ibm.com
> Software Engineer @ IBM/Meiosys  http://www.ibm.com
> Tel +33 (0)562 165 496
>
> "Anarchy is about taking complete responsibility for yourself."
> Alan Moore.

[Qemu-devel] [PATCH 02/23] block: New BlockBackend

2014-09-10 Thread Markus Armbruster

A block device consists of a frontend device model and a backend.

A block backend has a tree of block drivers doing the actual work.
The tree is managed by the block layer.

We currently use a single abstraction BlockDriverState both for tree
nodes and the backend as a whole.  Drawbacks:

* Its API includes both stuff that makes sense only at the block
  backend level (root of the tree) and stuff that's only for use
  within the block layer.  This makes the API bigger and more complex
  than necessary.  Moreover, it's not obvious which interfaces are
  meant for device models, and which really aren't.

* Since device models keep a reference to their backend, the backend
  object can't just be destroyed.  But for media change, we need to
  replace the tree.  Our solution is to make the BlockDriverState
  generic, with actual driver state in a separate object, pointed to
  by member opaque.  That lets us replace the tree by deinitializing
  and reinitializing its root.  This special need of the root makes
  the data structure awkward everywhere in the tree.

The general plan is to separate the APIs into "block backend", for use
by device models, monitor and whatever other code dealing with block
backends, and "block driver", for use by the block layer and whatever
other code (if any) dealing with trees and tree nodes.

Code dealing with block backends, device models in particular, should
become completely oblivious of BlockDriverState.  This should let us
clean up both APIs, and the tree data structures.

This commit is a first step.  It creates a minimal "block backend"
API: type BlockBackend and functions to create, destroy and find them.
BlockBackend objects are created and destroyed, but not yet used for
anything; that'll come shortly.

BlockBackend is reference-counted.  Its reference count never exceeds
one so far, but that's going to change.

Signed-off-by: Markus Armbruster 
---
 block/Makefile.objs|   2 +-
 block/block-backend.c  | 110 +
 blockdev.c |  10 +++-
 hw/block/xen_disk.c|  11 +
 include/qemu/typedefs.h|   1 +
 include/sysemu/block-backend.h |  26 ++
 qemu-img.c |  46 +
 qemu-io.c  |   8 +++
 qemu-nbd.c |   3 +-
 9 files changed, 214 insertions(+), 3 deletions(-)
 create mode 100644 block/block-backend.c
 create mode 100644 include/sysemu/block-backend.h

diff --git a/block/Makefile.objs b/block/Makefile.objs
index f45f939..a70140b 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -5,7 +5,7 @@ block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-$(CONFIG_QUORUM) += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
-block-obj-y += snapshot.o qapi.o
+block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
diff --git a/block/block-backend.c b/block/block-backend.c
new file mode 100644
index 000..833f7d9
--- /dev/null
+++ b/block/block-backend.c
@@ -0,0 +1,110 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster ,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "sysemu/block-backend.h"
+#include "block/block_int.h"
+
+struct BlockBackend {
+char *name;
+int refcnt;
+QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
+};
+
+static QTAILQ_HEAD(, BlockBackend) blk_backends =
+QTAILQ_HEAD_INITIALIZER(blk_backends);
+
+/**
+ * blk_new:
+ * @name: name, must not be %NULL or empty
+ * @errp: return location for an error to be set on failure, or %NULL
+ *
+ * Create a new BlockBackend, with a reference count of one.  Fail if
+ * @name already exists.
+ *
+ * Returns: the BlockBackend on success, %NULL on failure
+ */
+BlockBackend *blk_new(const char *name, Error **errp)
+{
+BlockBackend *blk = g_new0(BlockBackend, 1);
+
+assert(name && name[0]);
+if (blk_by_name(name)) {
+error_setg(errp, "Device with id '%s' already exists", name);
+return NULL;
+}
+blk->name = g_strdup(name);
+blk->refcnt = 1;
+QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
+return blk;
+}
+
+static void blk_delete(BlockBackend *blk)
+{
+assert(!blk->refcnt);
+QTAILQ_REMOVE(&blk_backends, blk, link);
+g_free(blk->name);
+g_free(blk);
+}
+
+/**
+ * blk_ref:
+ *
+ * Increment @blk's reference count.
+ */
+void blk_ref(BlockBackend *blk)
+{
+blk->refcnt++;
+}
+
+/**
+ * blk_unref:
+ *
+ * Decrement @blk's reference count.  If this drops it to zero,
+ * destroy @blk.
+ */
+void blk_unref(BlockBackend *blk)
+{
+if (blk) {
+g_assert(blk->refcnt > 0);
+if (!--blk->refcnt) {
+

[Qemu-devel] [PATCH 12/23] virtio-blk: Drop redundant VirtIOBlock member conf

2014-09-10 Thread Markus Armbruster

Commit 12c5674 turned it into a pointer to member blk.conf.

Signed-off-by: Markus Armbruster 
---
 hw/block/virtio-blk.c  | 28 ++--
 include/hw/virtio/virtio-blk.h |  1 -
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index a7f2827..0be7203 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -297,7 +297,7 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
 if (sector & dev->sector_mask) {
 return false;
 }
-if (size % dev->conf->logical_block_size) {
+if (size % dev->blk.conf.logical_block_size) {
 return false;
 }
 bdrv_get_geometry(dev->bs, &total_sectors);
@@ -516,19 +516,20 @@ static void virtio_blk_reset(VirtIODevice *vdev)
 static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
 {
 VirtIOBlock *s = VIRTIO_BLK(vdev);
+BlockConf *conf = &s->blk.conf;
 struct virtio_blk_config blkcfg;
 uint64_t capacity;
-int blk_size = s->conf->logical_block_size;
+int blk_size = conf->logical_block_size;
 
 bdrv_get_geometry(s->bs, &capacity);
 memset(&blkcfg, 0, sizeof(blkcfg));
 virtio_stq_p(vdev, &blkcfg.capacity, capacity);
 virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2);
-virtio_stw_p(vdev, &blkcfg.cylinders, s->conf->cyls);
+virtio_stw_p(vdev, &blkcfg.cylinders, conf->cyls);
 virtio_stl_p(vdev, &blkcfg.blk_size, blk_size);
-virtio_stw_p(vdev, &blkcfg.min_io_size, s->conf->min_io_size / blk_size);
-virtio_stw_p(vdev, &blkcfg.opt_io_size, s->conf->opt_io_size / blk_size);
-blkcfg.heads = s->conf->heads;
+virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size);
+virtio_stw_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size);
+blkcfg.heads = conf->heads;
 /*
  * We must ensure that the block device capacity is a multiple of
  * the logical block size. If that is not the case, let's use
@@ -540,13 +541,13 @@ static void virtio_blk_update_config(VirtIODevice *vdev, 
uint8_t *config)
  * divided by 512 - instead it is the amount of blk_size blocks
  * per track (cylinder).
  */
-if (bdrv_getlength(s->bs) /  s->conf->heads / s->conf->secs % blk_size) {
-blkcfg.sectors = s->conf->secs & ~s->sector_mask;
+if (bdrv_getlength(s->bs) /  conf->heads / conf->secs % blk_size) {
+blkcfg.sectors = conf->secs & ~s->sector_mask;
 } else {
-blkcfg.sectors = s->conf->secs;
+blkcfg.sectors = conf->secs;
 }
 blkcfg.size_max = 0;
-blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
+blkcfg.physical_block_exp = get_physical_block_exp(&s->blk.conf);
 blkcfg.alignment_offset = 0;
 blkcfg.wce = bdrv_enable_write_cache(s->bs);
 memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
@@ -753,9 +754,8 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 sizeof(struct virtio_blk_config));
 
 s->bs = blk->conf.bs;
-s->conf = &blk->conf;
 s->rq = NULL;
-s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;
+s->sector_mask = (s->blk.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
 
 s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output);
 s->complete_request = virtio_blk_complete_request;
@@ -774,11 +774,11 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
 virtio_blk_save, virtio_blk_load, s);
 bdrv_set_dev_ops(s->bs, &virtio_block_ops, s);
-bdrv_set_guest_block_size(s->bs, s->conf->logical_block_size);
+bdrv_set_guest_block_size(s->bs, s->blk.conf.logical_block_size);
 
 bdrv_iostatus_enable(s->bs);
 
-add_boot_device_path(s->conf->bootindex, dev, "/disk@0,0");
+add_boot_device_path(s->blk.conf.bootindex, dev, "/disk@0,0");
 }
 
 static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp)
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index afb7b8d..1329482 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -124,7 +124,6 @@ typedef struct VirtIOBlock {
 VirtQueue *vq;
 void *rq;
 QEMUBH *bh;
-BlockConf *conf;
 VirtIOBlkConf blk;
 unsigned short sector_mask;
 bool original_wce;
-- 
1.9.3

[Qemu-devel] [PATCH 16/23] pc87312: Drop unused members of PC87312State

2014-09-10 Thread Markus Armbruster

Signed-off-by: Markus Armbruster 
---
 include/hw/isa/pc87312.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/hw/isa/pc87312.h b/include/hw/isa/pc87312.h
index befc8bd..bf74470 100644
--- a/include/hw/isa/pc87312.h
+++ b/include/hw/isa/pc87312.h
@@ -47,13 +47,10 @@ typedef struct PC87312State {
 
 struct {
 ISADevice *dev;
-BlockDriverState *drive[2];
-uint32_t base;
 } fdc;
 
 struct {
 ISADevice *dev;
-uint32_t base;
 } ide;
 
 MemoryRegion io;
-- 
1.9.3

[Qemu-devel] [PATCH 15/23] ide: Complete conversion from BlockDriverState to BlockBackend

2014-09-10 Thread Markus Armbruster

Add a BlockBackend member to TrimAIOCB, so ide_issue_trim_cb() can use
blk_aio_discard() instead of bdrv_aio_discard().

Signed-off-by: Markus Armbruster 
---
 hw/ide/core.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index fe12145..c043dbe 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -362,6 +362,7 @@ static void ide_set_signature(IDEState *s)
 
 typedef struct TrimAIOCB {
 BlockAIOCB common;
+BlockBackend *blk;
 QEMUBH *bh;
 int ret;
 QEMUIOVector *qiov;
@@ -423,8 +424,8 @@ static void ide_issue_trim_cb(void *opaque, int ret)
 }
 
 /* Got an entry! Submit and exit.  */
-iocb->aiocb = bdrv_aio_discard(iocb->common.bs, sector, count,
-   ide_issue_trim_cb, opaque);
+iocb->aiocb = blk_aio_discard(iocb->blk, sector, count,
+  ide_issue_trim_cb, opaque);
 return;
 }
 
@@ -448,6 +449,7 @@ BlockAIOCB *ide_issue_trim(BlockBackend *blk,
 TrimAIOCB *iocb;
 
 iocb = blk_aio_get(&trim_aiocb_info, blk, cb, opaque);
+iocb->blk = blk;
 iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
 iocb->ret = 0;
 iocb->qiov = qiov;
-- 
1.9.3

[Qemu-devel] [PATCH 05/23] block: Make BlockBackend own its BlockDriverState

2014-09-10 Thread Markus Armbruster

On BlockBackend destruction, unref its BlockDriverState.  Replaces the
callers' unrefs.

Signed-off-by: Markus Armbruster 
---
 block/block-backend.c |  9 ++---
 blockdev.c| 11 +++
 hw/block/xen_disk.c   |  6 +++---
 qemu-img.c| 35 +--
 qemu-io.c |  5 -
 5 files changed, 9 insertions(+), 57 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 2a22660..ae51f7f 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -58,10 +58,7 @@ BlockBackend *blk_new(const char *name, Error **errp)
  * @errp: return location for an error to be set on failure, or %NULL
  *
  * Create a new BlockBackend, with a reference count of one, and
- * attach a new BlockDriverState to it, also with a reference count of
- * one.  Caller owns *both* references.
- * TODO Let caller own only the BlockBackend reference
- * Fail if @name already exists.
+ * a new BlockDriverState attached.  Fail if @name already exists.
  *
  * Returns: the BlockBackend on success, %NULL on error
  */
@@ -88,6 +85,7 @@ BlockBackend *blk_new_with_bs(const char *name, Error **errp)
 static void blk_delete(BlockBackend *blk)
 {
 assert(!blk->refcnt);
+bdrv_unref(blk->bs);
 blk_detach_bs(blk);
 QTAILQ_REMOVE(&blk_backends, blk, link);
 g_free(blk->name);
@@ -110,9 +108,6 @@ void blk_ref(BlockBackend *blk)
  *
  * Decrement @blk's reference count.  If this drops it to zero,
  * destroy @blk.
- *
- * Does *not* touch the attached BlockDriverState's reference count.
- * TODO Decrement it!
  */
 void blk_unref(BlockBackend *blk)
 {
diff --git a/blockdev.c b/blockdev.c
index 73e2da9..791f6d9 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -108,7 +108,7 @@ void blockdev_auto_del(BlockDriverState *bs)
 DriveInfo *dinfo = blk_legacy_dinfo(blk);
 
 if (dinfo && dinfo->auto_del) {
-drive_del(dinfo);
+blk_unref(blk);
 }
 }
 
@@ -219,10 +219,7 @@ static void bdrv_format_print(void *opaque, const char 
*name)
 
 void drive_del(DriveInfo *dinfo)
 {
-BlockBackend *blk = dinfo->bdrv->blk;
-
-bdrv_unref(dinfo->bdrv);
-blk_unref(blk);
+blk_unref(dinfo->bdrv->blk);
 }
 
 typedef struct {
@@ -524,7 +521,6 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 return blk;
 
 err:
-bdrv_unref(dinfo->bdrv);
 blk_unref(blk);
 early_err:
 qemu_opts_del(opts);
@@ -1778,7 +1774,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, 
QObject **ret_data)
 bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
   BLOCKDEV_ON_ERROR_REPORT);
 } else {
-drive_del(drive_get_by_blockdev(bs));
+blk_unref(blk);
 }
 
 aio_context_release(aio_context);
@@ -2515,7 +2511,6 @@ void qmp_blockdev_add(BlockdevOptions *options, Error 
**errp)
 }
 
 if (bdrv_key_required(blk_bs(blk))) {
-bdrv_unref(blk_bs(blk));
 blk_unref(blk);
 error_setg(errp, "blockdev-add doesn't support encrypted devices");
 goto fail;
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 51f4f3a..6d474b9 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -870,7 +870,6 @@ static int blk_connect(struct XenDevice *xendev)
 xen_be_printf(&blkdev->xendev, 0, "error: %s\n",
   error_get_pretty(local_err));
 error_free(local_err);
-bdrv_unref(blkdev->bs);
 blk_unref(blk);
 blkdev->bs = NULL;
 return -1;
@@ -886,7 +885,9 @@ static int blk_connect(struct XenDevice *xendev)
 }
 /* blkdev->bs is not create by us, we get a reference
  * so we can bdrv_unref() unconditionally */
-bdrv_ref(blkdev->bs);
+/* Except we don't bdrv_unref() anymore, we blk_unref().
+ * Conditionally, because we can't easily blk_ref() here.
+ * TODO Clean this up! */
 }
 bdrv_attach_dev_nofail(blkdev->bs, blkdev);
 blkdev->file_size = bdrv_getlength(blkdev->bs);
@@ -986,7 +987,6 @@ static void blk_disconnect(struct XenDevice *xendev)
 
 if (blkdev->bs) {
 bdrv_detach_dev(blkdev->bs, blkdev);
-bdrv_unref(blkdev->bs);
 if (!blkdev->dinfo) {
 blk_unref(blk_by_name(blkdev->dev));
 }
diff --git a/qemu-img.c b/qemu-img.c
index 9fba5a1..083a8eb 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -329,7 +329,6 @@ static BlockBackend *img_open(const char *id, const char 
*filename,
 }
 return blk;
 fail:
-bdrv_unref(bs);
 blk_unref(blk);
 return NULL;
 }
@@ -712,9 +711,7 @@ static int img_check(int argc, char **argv)
 
 fail:
 qapi_free_ImageCheck(check);
-bdrv_unref(bs);
 blk_unref(blk);
-
 return ret;
 }
 
@@ -786,7 +783,6 @@ static int img_commit(int argc, char **argv)
 break;
 }
 
-bdrv_unref(bs);
 blk_unref(blk);
 if (ret) {
 return 1;
@@ -1194,12 +1190,10 @@ static int img_compare(int

[Qemu-devel] [PATCH 19/23] blockdev: Drop DriveInfo member enable_auto_del

2014-09-10 Thread Markus Armbruster

Commit 2d246f0 introduced DriveInfo member enable_auto_del to
distinguish DriveInfo created via drive_new() from DriveInfo created
via qmp_blockdev_add().  The latter no longer exist.  Drop
enable_auto_del.

Signed-off-by: Markus Armbruster 
---
 blockdev.c| 7 ++-
 include/sysemu/blockdev.h | 1 -
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 12906a6..317239c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -90,16 +90,14 @@ void blockdev_mark_auto_del(BlockBackend *blk)
 DriveInfo *dinfo = blk_legacy_dinfo(blk);
 BlockDriverState *bs = blk_bs(blk);
 
-if (dinfo && !dinfo->enable_auto_del) {
+if (!dinfo) {
 return;
 }
 
 if (bs->job) {
 block_job_cancel(bs->job);
 }
-if (dinfo) {
-dinfo->auto_del = 1;
-}
+dinfo->auto_del = 1;
 }
 
 void blockdev_auto_del(BlockBackend *blk)
@@ -899,7 +897,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType 
block_default_type)
 
 /* Set legacy DriveInfo fields */
 dinfo = g_malloc0(sizeof(*dinfo));
-dinfo->enable_auto_del = true;
 dinfo->opts = all_opts;
 dinfo->cyls = cyls;
 dinfo->heads = heads;
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index 27a40d5..2129d81 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -35,7 +35,6 @@ struct DriveInfo {
 int bus;
 int unit;
 int auto_del;   /* see blockdev_mark_auto_del() */
-bool enable_auto_del;   /* Only for legacy drive_new() */
 int media_cd;
 int cyls, heads, secs, trans;
 QemuOpts *opts;
-- 
1.9.3

[Qemu-devel] [PATCH 13/23] virtio-blk: Rename VirtIOBlkConf variables to conf

2014-09-10 Thread Markus Armbruster

This is consistent with how VirtIOFOOConf variables are named
elsewhere, and makes blk available for BlockBackend variables.

Signed-off-by: Markus Armbruster 
---
 hw/block/dataplane/virtio-blk.c | 33 +-
 hw/block/dataplane/virtio-blk.h |  2 +-
 hw/block/virtio-blk.c   | 52 -
 include/hw/virtio/virtio-blk.h  |  2 +-
 4 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index b55188c..af67dc3 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -30,7 +30,7 @@ struct VirtIOBlockDataPlane {
 bool stopping;
 bool disabled;
 
-VirtIOBlkConf *blk;
+VirtIOBlkConf *conf;
 
 VirtIODevice *vdev;
 Vring vring;/* virtqueue vring */
@@ -94,7 +94,7 @@ static void handle_notify(EventNotifier *e)
 VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
 
 event_notifier_test_and_clear(&s->host_notifier);
-bdrv_io_plug(s->blk->conf.bs);
+bdrv_io_plug(s->conf->conf.bs);
 for (;;) {
 MultiReqBuffer mrb = {
 .num_writes = 0,
@@ -120,7 +120,7 @@ static void handle_notify(EventNotifier *e)
 virtio_blk_handle_request(req, &mrb);
 }
 
-virtio_submit_multiwrite(s->blk->conf.bs, &mrb);
+virtio_submit_multiwrite(s->conf->conf.bs, &mrb);
 
 if (likely(ret == -EAGAIN)) { /* vring emptied */
 /* Re-enable guest->host notifies and stop processing the vring.
@@ -133,11 +133,11 @@ static void handle_notify(EventNotifier *e)
 break;
 }
 }
-bdrv_io_unplug(s->blk->conf.bs);
+bdrv_io_unplug(s->conf->conf.bs);
 }
 
 /* Context: QEMU global mutex held */
-void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
+void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
   VirtIOBlockDataPlane **dataplane,
   Error **errp)
 {
@@ -148,7 +148,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, 
VirtIOBlkConf *blk,
 
 *dataplane = NULL;
 
-if (!blk->data_plane && !blk->iothread) {
+if (!conf->data_plane && !conf->iothread) {
 return;
 }
 
@@ -163,7 +163,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, 
VirtIOBlkConf *blk,
 /* If dataplane is (re-)enabled while the guest is running there could be
  * block jobs that can conflict.
  */
-if (bdrv_op_is_blocked(blk->conf.bs, BLOCK_OP_TYPE_DATAPLANE, &local_err)) 
{
+if (bdrv_op_is_blocked(conf->conf.bs, BLOCK_OP_TYPE_DATAPLANE,
+   &local_err)) {
 error_report("cannot start dataplane thread: %s",
   error_get_pretty(local_err));
 error_free(local_err);
@@ -172,10 +173,10 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, 
VirtIOBlkConf *blk,
 
 s = g_new0(VirtIOBlockDataPlane, 1);
 s->vdev = vdev;
-s->blk = blk;
+s->conf = conf;
 
-if (blk->iothread) {
-s->iothread = blk->iothread;
+if (conf->iothread) {
+s->iothread = conf->iothread;
 object_ref(OBJECT(s->iothread));
 } else {
 /* Create per-device IOThread if none specified.  This is for
@@ -192,9 +193,9 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, 
VirtIOBlkConf *blk,
 s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
 
 error_setg(&s->blocker, "block device is in use by data plane");
-bdrv_op_block_all(blk->conf.bs, s->blocker);
-bdrv_op_unblock(blk->conf.bs, BLOCK_OP_TYPE_RESIZE, s->blocker);
-bdrv_op_unblock(blk->conf.bs, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker);
+bdrv_op_block_all(conf->conf.bs, s->blocker);
+bdrv_op_unblock(conf->conf.bs, BLOCK_OP_TYPE_RESIZE, s->blocker);
+bdrv_op_unblock(conf->conf.bs, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker);
 
 *dataplane = s;
 }
@@ -207,7 +208,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
 }
 
 virtio_blk_data_plane_stop(s);
-bdrv_op_unblock_all(s->blk->conf.bs, s->blocker);
+bdrv_op_unblock_all(s->conf->conf.bs, s->blocker);
 error_free(s->blocker);
 object_unref(OBJECT(s->iothread));
 qemu_bh_delete(s->bh);
@@ -262,7 +263,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
 s->started = true;
 trace_virtio_blk_data_plane_start(s);
 
-bdrv_set_aio_context(s->blk->conf.bs, s->ctx);
+bdrv_set_aio_context(s->conf->conf.bs, s->ctx);
 
 /* Kick right away to begin processing requests already in vring */
 event_notifier_set(virtio_queue_get_host_notifier(vq));
@@ -308,7 +309,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
 aio_set_event_notifier(s->ctx, &s->host_notifier, NULL);
 
 /* Drain and switch bs back to the QEMU main loop */
-bdrv_set_aio_context(s->blk->conf.bs, qemu_get_aio_context());
+bdrv_set_aio_context(s->conf->conf.bs, qemu_get_aio_c

[Qemu-devel] [PATCH 17/23] blockdev: Drop superfluous DriveInfo member id

2014-09-10 Thread Markus Armbruster

Signed-off-by: Markus Armbruster 
---
 block/block-backend.c | 1 -
 blockdev.c| 3 +--
 include/sysemu/blockdev.h | 1 -
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index cc21f3c..1bac033 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -123,7 +123,6 @@ static void drive_info_del(DriveInfo *dinfo)
 {
 if (dinfo) {
 qemu_opts_del(dinfo->opts);
-g_free(dinfo->id);
 g_free(dinfo->serial);
 g_free(dinfo);
 }
diff --git a/blockdev.c b/blockdev.c
index 6f1e479..4488dfd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -458,7 +458,6 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 }
 
 dinfo = g_malloc0(sizeof(*dinfo));
-dinfo->id = g_strdup(qemu_opts_id(opts));
 blk_set_legacy_dinfo(blk, dinfo);
 
 if (!file || !*file) {
@@ -492,7 +491,7 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 
 if (ret < 0) {
 error_setg(errp, "could not open disk image %s: %s",
-   file ?: dinfo->id, error_get_pretty(error));
+   file ?: blk_name(blk), error_get_pretty(error));
 error_free(error);
 goto err;
 }
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index f66b89a..27a40d5 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -30,7 +30,6 @@ typedef enum {
 } BlockInterfaceType;
 
 struct DriveInfo {
-char *id;
 const char *devaddr;
 BlockInterfaceType type;
 int bus;
-- 
1.9.3

[Qemu-devel] [PATCH 18/23] blockdev: Fix blockdev-add not to create IDE drive (0, 0)

2014-09-10 Thread Markus Armbruster

blockdev_init() always creates a DriveInfo, but only drive_new() fills
it in.  qmp_blockdev_add() leaves it blank.  This results in a drive
with type = IF_IDE, bus = 0, unit = 0.  Screwed up in commit ee13ed1c.

Board initialization code looking for IDE drive (0,0) can pick up one
of these bogus drives.  Not sure whether getting the QMP command
executed early enough is likely in practice, though.

Fix by creating DriveInfo in drive_new().  Block backends created by
blockdev-add don't get one.

A few places assume a block backend always has a DriveInfo.  Fix them
up.

Signed-off-by: Markus Armbruster 
---
 blockdev.c  | 10 ++
 hw/block/block.c| 16 ++--
 hw/ide/qdev.c   |  2 +-
 hw/scsi/scsi-disk.c |  2 +-
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 4488dfd..12906a6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -285,7 +285,6 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 int on_read_error, on_write_error;
 BlockBackend *blk;
 BlockDriverState *bs;
-DriveInfo *dinfo;
 ThrottleConfig cfg;
 int snapshot = 0;
 bool copy_on_read;
@@ -457,9 +456,6 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 bdrv_set_io_limits(bs, &cfg);
 }
 
-dinfo = g_malloc0(sizeof(*dinfo));
-blk_set_legacy_dinfo(blk, dinfo);
-
 if (!file || !*file) {
 if (has_driver_specific_opts) {
 file = NULL;
@@ -902,21 +898,19 @@ DriveInfo *drive_new(QemuOpts *all_opts, 
BlockInterfaceType block_default_type)
 }
 
 /* Set legacy DriveInfo fields */
-dinfo = blk_legacy_dinfo(blk);
+dinfo = g_malloc0(sizeof(*dinfo));
 dinfo->enable_auto_del = true;
 dinfo->opts = all_opts;
-
 dinfo->cyls = cyls;
 dinfo->heads = heads;
 dinfo->secs = secs;
 dinfo->trans = translation;
-
 dinfo->type = type;
 dinfo->bus = bus_id;
 dinfo->unit = unit_id;
 dinfo->devaddr = devaddr;
-
 dinfo->serial = g_strdup(serial);
+blk_set_legacy_dinfo(blk, dinfo);
 
 switch(type) {
 case IF_IDE:
diff --git a/hw/block/block.c b/hw/block/block.c
index 0666dd3..a625773 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -19,7 +19,9 @@ void blkconf_serial(BlockConf *conf, char **serial)
 if (!*serial) {
 /* try to fall back to value set with legacy -drive serial=... */
 dinfo = blk_legacy_dinfo(conf->blk);
-*serial = g_strdup(dinfo->serial);
+if (dinfo) {
+*serial = g_strdup(dinfo->serial);
+}
 }
 }
 
@@ -32,11 +34,13 @@ void blkconf_geometry(BlockConf *conf, int *ptrans,
 if (!conf->cyls && !conf->heads && !conf->secs) {
 /* try to fall back to value set with legacy -drive cyls=... */
 dinfo = blk_legacy_dinfo(conf->blk);
-conf->cyls  = dinfo->cyls;
-conf->heads = dinfo->heads;
-conf->secs  = dinfo->secs;
-if (ptrans) {
-*ptrans = dinfo->trans;
+if (dinfo) {
+conf->cyls  = dinfo->cyls;
+conf->heads = dinfo->heads;
+conf->secs  = dinfo->secs;
+if (ptrans) {
+*ptrans = dinfo->trans;
+}
 }
 }
 if (!conf->cyls && !conf->heads && !conf->secs) {
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 4818334..a74c81e 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -206,7 +206,7 @@ static int ide_drive_initfn(IDEDevice *dev)
 {
 DriveInfo *dinfo = blk_legacy_dinfo(dev->conf.blk);
 
-return ide_dev_initfn(dev, dinfo->media_cd ? IDE_CD : IDE_HD);
+return ide_dev_initfn(dev, dinfo && dinfo->media_cd ? IDE_CD : IDE_HD);
 }
 
 #define DEFINE_IDE_DEV_PROPERTIES() \
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 768c1ad..e0c5ed3 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2331,7 +2331,7 @@ static void scsi_disk_realize(SCSIDevice *dev, Error 
**errp)
 }
 
 dinfo = blk_legacy_dinfo(dev->conf.blk);
-if (dinfo->media_cd) {
+if (dinfo && dinfo->media_cd) {
 scsi_cd_realize(dev, errp);
 } else {
 scsi_hd_realize(dev, errp);
-- 
1.9.3

Re: [Qemu-devel] [PATCH v5 05/22] block: Convert bdrv_em_aiocb_info.cancel to .cancel_async

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 07:59, Fam Zheng ha scritto:
> @@ -4679,6 +4679,9 @@ static void bdrv_aio_cancel_em(BlockDriverAIOCB 
> *blockacb)
>  {
>  BlockDriverAIOCBSync *acb =
>  container_of(blockacb, BlockDriverAIOCBSync, common);
> +
> +acb->ret = -ECANCELED;
> +acb->common.cb(acb->common.opaque, acb->ret);
>  qemu_bh_delete(acb->bh);
>  acb->bh = NULL;
>  qemu_aio_release(acb);

This could call the callback before I/O is finished.  I/O can then
complete and write to disk stuff that was not meant to be written.

I think there is a pre-existing bug, which should be fixed with a "bool
*done" member similar to BlockDriverAIOCBCoroutine's.  But for the sake
of conversion to async cancellation, you can just empty bdrv_aio_cancel_em.

Paolo

Re: [Qemu-devel] [PATCH] virtio-pci: enable bus master for old guests

2014-09-10 Thread Michael S. Tsirkin

On Wed, Sep 10, 2014 at 01:44:49PM +0530, Nikunj A Dadhania wrote:
> Greg Kurz  writes:
> 
> > On Mon, 8 Sep 2014 19:05:02 +0300
> > "Michael S. Tsirkin"  wrote:
> >
> >> commit cc943c36faa192cd4b32af8fe5edb31894017d35
> >> pci: Use bus master address space for delivering MSI/MSI-X messages
> >> breaks virtio-net for rhel6.[56] x86 guests because they don't
> >> enable bus mastering for virtio PCI devices
> >> 
> >> Old guests forgot to enable bus mastering, enable it
> >> automatically on DRIVER_OK.
> >> 
> >> Note: we should either back out the original patch from
> >> stable or apply this one on top.
> >> 
> >> Cc: qemu-sta...@nongnu.org
> >> Reported-by: Greg Kurz 
> >> Signed-off-by: Jan Kiszka 
> >> Signed-off-by: Michael S. Tsirkin 
> >> ---
> >>  hw/virtio/virtio-pci.c | 2 ++
> >>  1 file changed, 2 insertions(+)
> >> 
> >> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> >> index ddb5da1..af937d2 100644
> >> --- a/hw/virtio/virtio-pci.c
> >> +++ b/hw/virtio/virtio-pci.c
> >> @@ -320,6 +320,8 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> >> addr, uint32_t val)
> >>  if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
> >>  !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
> >>  proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
> >> +
> >> memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
> >> +  true);
> >>  }
> >>  break;
> >>  case VIRTIO_MSI_CONFIG_VECTOR:
> >
> > Cc'ing Alexey for some SLOF and early boot of the ppc64 kernel expertise.
> >
> > Michael,
> >
> > This was enough to fix virtio-net in the rhel6.5 x86 guest case. 
> > Unfortunately,
> > this fails for rhel6.5 ppc64 because it is never called... 
> 
> > I did some debugging: it looks like the guest kernel calls the OF
> > quisece call to flush pending DMA and disables bus master on the
> > virtio-blk device (PCI_COMMAND == 0x3).
> 
> Getting confused, above you are talking about virtio-net and here it is
> virtio-blk.
> 
> Anyways, the routines still remains same for both of them.  From SLOF
> during init we set DRIVER_OK, and after using the device during the
> quiesce, called from linux kernel VIRTIO_CONFIG_S_FAILED is set and then
> a VIRTIO_DEVICE_RESET is done.

BTW, you really should start enabling bus mastering, avoid relying
on the work-around we have for broken guests.

> > The guest then continues to boot and hangs... It appears that waiting
> > for the guest to issue VIRTIO_CONFIG_S_DRIVER_OK is not enough. Since
> > we need this for MSI to work, I tried the following and it fixes the
> > issue:
> >
> > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > index af937d2..3d72aa8 100644
> > --- a/hw/virtio/virtio-pci.c
> > +++ b/hw/virtio/virtio-pci.c
> > @@ -111,9 +111,14 @@ static void virtio_pci_notify(DeviceState *d, uint16_t 
> > vector)
> >  {
> >  VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
> >  
> > -if (msix_enabled(&proxy->pci_dev))
> > +if (msix_enabled(&proxy->pci_dev)) {
> > +if (!(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
> > +proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
> > +
> > memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
> > +  true);
> > +}
> >  msix_notify(&proxy->pci_dev, vector);
> > -else {
> > +} else {
> >  VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> >  pci_set_irq(&proxy->pci_dev, vdev->isr & 1);
> >  }
> >
> > If this is acceptable, I'll make it a helper and squash it into your patch.
> >
> > Thoughts ?
> >
> > -- 
> > Gregory Kurz kurzg...@fr.ibm.com
> >  gk...@linux.vnet.ibm.com
> > Software Engineer @ IBM/Meiosys  http://www.ibm.com
> > Tel +33 (0)562 165 496
> >
> > "Anarchy is about taking complete responsibility for yourself."
> > Alan Moore.

[Qemu-devel] [PATCH 22/23] block: Lift device model API into BlockBackend

2014-09-10 Thread Markus Armbruster

Move device model attachment / detachment and the BlockDevOps device
model callbacks and their wrappers from BlockDriverState to
BlockBackend.

Signed-off-by: Markus Armbruster 
---
 block.c| 126 +
 block/block-backend.c  | 107 ++
 block/qapi.c   |   8 +--
 blockdev.c |   8 +--
 include/block/block.h  |  45 ---
 include/block/block_int.h  |  12 ++--
 include/sysemu/block-backend.h |  35 
 7 files changed, 159 insertions(+), 182 deletions(-)

diff --git a/block.c b/block.c
index f71b87c..e81087f 100644
--- a/block.c
+++ b/block.c
@@ -58,9 +58,6 @@ struct BdrvDirtyBitmap {
 
 #define NOT_DONE 0x7fff /* used while emulated sync operation in progress 
*/
 
-#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
-
-static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
 static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 BlockCompletionFunc *cb, void *opaque);
@@ -1516,7 +1513,9 @@ int bdrv_open(BlockDriverState **pbs, const char 
*filename,
 }
 
 if (!bdrv_key_required(bs)) {
-bdrv_dev_change_media_cb(bs, true);
+if (bs->blk) {
+blk_dev_change_media_cb(bs->blk, true);
+}
 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
&& !runstate_check(RUN_STATE_INMIGRATE)
&& !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
@@ -1841,7 +1840,9 @@ void bdrv_close(BlockDriverState *bs)
 }
 }
 
-bdrv_dev_change_media_cb(bs, false);
+if (bs->blk) {
+blk_dev_change_media_cb(bs->blk, false);
+}
 
 /*throttling disk I/O limits*/
 if (bs->io_limits_enabled) {
@@ -1949,9 +1950,6 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 /* move some fields that need to stay attached to the device */
 
 /* dev info */
-bs_dest->dev_ops= bs_src->dev_ops;
-bs_dest->dev_opaque = bs_src->dev_opaque;
-bs_dest->dev= bs_src->dev;
 bs_dest->guest_block_size   = bs_src->guest_block_size;
 bs_dest->copy_on_read   = bs_src->copy_on_read;
 
@@ -2017,7 +2015,6 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
 assert(!bs_new->blk);
 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
 assert(bs_new->job == NULL);
-assert(bs_new->dev == NULL);
 assert(bs_new->io_limits_enabled == false);
 assert(!throttle_have_timer(&bs_new->throttle_state));
 
@@ -2034,7 +2031,6 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old)
 assert(!bs_new->blk);
 
 /* Check a few fields that should remain attached to the device */
-assert(bs_new->dev == NULL);
 assert(bs_new->job == NULL);
 assert(bs_new->io_limits_enabled == false);
 assert(!throttle_have_timer(&bs_new->throttle_state));
@@ -2073,7 +2069,6 @@ void bdrv_append(BlockDriverState *bs_new, 
BlockDriverState *bs_top)
 
 static void bdrv_delete(BlockDriverState *bs)
 {
-assert(!bs->dev);
 assert(!bs->job);
 assert(bdrv_op_blocker_is_empty(bs));
 assert(!bs->refcnt);
@@ -2087,105 +2082,6 @@ static void bdrv_delete(BlockDriverState *bs)
 g_free(bs);
 }
 
-int bdrv_attach_dev(BlockDriverState *bs, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
-{
-if (bs->dev) {
-return -EBUSY;
-}
-bs->dev = dev;
-bdrv_iostatus_reset(bs);
-
-/* We're expecting I/O from the device so bump up coroutine pool size */
-qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
-return 0;
-}
-
-/* TODO qdevified devices don't use this, remove when devices are qdevified */
-void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
-{
-if (bdrv_attach_dev(bs, dev) < 0) {
-abort();
-}
-}
-
-void bdrv_detach_dev(BlockDriverState *bs, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
-{
-assert(bs->dev == dev);
-bs->dev = NULL;
-bs->dev_ops = NULL;
-bs->dev_opaque = NULL;
-bs->guest_block_size = 512;
-qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
-}
-
-/* TODO change to return DeviceState * when all users are qdevified */
-void *bdrv_get_attached_dev(BlockDriverState *bs)
-{
-return bs->dev;
-}
-
-void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
-  void *opaque)
-{
-bs->dev_ops = ops;
-bs->dev_opaque = opaque;
-}
-
-static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
-{
-if (bs->dev_ops && bs->dev_ops->change_media_cb) {
-bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
-bs->dev_ops->change_media_cb(bs->dev_opaque, load);
-if (tray_was_closed) {
-/* tray open */
-qapi_event_send_devic

[Qemu-devel] [PATCH 11/23] block: Rename BlockDriverAIOCB* to BlockAIOCB*

2014-09-10 Thread Markus Armbruster

I'll use BlockDriverAIOCB with block backends shortly, and the name is
going to fit badly there.  It's a block layer thing anyway, not just a
block driver thing.

Signed-off-by: Markus Armbruster 
---
 block-migration.c   |   2 +-
 block.c | 151 ++--
 block/archipelago.c |  30 -
 block/backup.c  |   2 +-
 block/blkdebug.c|  22 +++
 block/blkverify.c   |  20 +++---
 block/commit.c  |   2 +-
 block/curl.c|   8 +--
 block/iscsi.c   |   8 +--
 block/linux-aio.c   |   8 +--
 block/mirror.c  |   6 +-
 block/qed-gencb.c   |   4 +-
 block/qed-table.c   |  10 +--
 block/qed.c |  46 +++---
 block/qed.h |  12 ++--
 block/quorum.c  |  38 +--
 block/raw-aio.h |   8 +--
 block/raw-posix.c   |  32 +-
 block/raw-win32.c   |  16 ++---
 block/raw_bsd.c |   8 +--
 block/rbd.c |  58 -
 block/sheepdog.c|   4 +-
 block/stream.c  |   2 +-
 block/win32-aio.c   |   8 +--
 blockjob.c  |   4 +-
 dma-helpers.c   |  24 +++
 hw/block/nvme.h |   2 +-
 hw/ide/ahci.c   |   2 +-
 hw/ide/ahci.h   |   2 +-
 hw/ide/core.c   |  12 ++--
 hw/ide/internal.h   |  12 ++--
 hw/ide/macio.c  |   2 +-
 hw/ide/pci.c|   2 +-
 hw/ide/pci.h|   2 +-
 hw/ppc/mac.h|   2 +-
 hw/scsi/scsi-generic.c  |   2 +-
 include/block/aio.h |  12 ++--
 include/block/block.h   |  36 +--
 include/block/block_int.h   |  30 -
 include/block/blockjob.h|   4 +-
 include/block/thread-pool.h |   4 +-
 include/hw/scsi/scsi.h  |   2 +-
 include/monitor/monitor.h   |   4 +-
 include/sysemu/dma.h|  26 
 monitor.c   |   6 +-
 tests/test-thread-pool.c|   2 +-
 thread-pool.c   |   8 +--
 47 files changed, 353 insertions(+), 354 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index da30e93..08db01a 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -72,7 +72,7 @@ typedef struct BlkMigBlock {
 int nr_sectors;
 struct iovec iov;
 QEMUIOVector qiov;
-BlockDriverAIOCB *aiocb;
+BlockAIOCB *aiocb;
 
 /* Protected by block migration lock.  */
 int ret;
diff --git a/block.c b/block.c
index 34c8f8c..f71b87c 100644
--- a/block.c
+++ b/block.c
@@ -61,12 +61,12 @@ struct BdrvDirtyBitmap {
 #define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
 
 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
-static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+BlockCompletionFunc *cb, void *opaque);
+static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-BlockDriverCompletionFunc *cb, void *opaque);
+BlockCompletionFunc *cb, void *opaque);
 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
  int64_t sector_num, int nb_sectors,
  QEMUIOVector *iov);
@@ -79,14 +79,14 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState 
*bs,
 static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
 BdrvRequestFlags flags);
-static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
-   int64_t sector_num,
-   QEMUIOVector *qiov,
-   int nb_sectors,
-   BdrvRequestFlags flags,
-   BlockDriverCompletionFunc *cb,
-   void *opaque,
-   bool is_write);
+static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb,
+ void *opaque,
+ bool is_write);
 static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv

Re: [Qemu-devel] [PATCH v1 1/2] block/archipelago: Implement bdrv_truncate()

2014-09-10 Thread Chrysostomos Nanakos


On 09/10/2014 11:20 AM, Kevin Wolf wrote:

Am 09.09.2014 um 19:38 hat Chrysostomos Nanakos geschrieben:

Signed-off-by: Chrysostomos Nanakos 
---
  block/archipelago.c |   63 +--
  1 file changed, 61 insertions(+), 2 deletions(-)

Thanks, applied all to the block branch.

Can you please add a cover letter (the [PATCH 0/n] mail as produced by
the --cover-letter option in git format-patch) for your next series?

Kevin
Yes of course, removed it in the last minute thinking that it was only 
two commits and there was no need for a cover letter.


Thanks.

Regards,
Chrysostomos.

Re: [Qemu-devel] [PATCH] virtio-pci: enable bus master for old guests

2014-09-10 Thread Michael S. Tsirkin

On Wed, Sep 10, 2014 at 12:35:32AM +0200, Greg Kurz wrote:
> On Mon, 8 Sep 2014 19:05:02 +0300
> "Michael S. Tsirkin"  wrote:
> 
> > commit cc943c36faa192cd4b32af8fe5edb31894017d35
> > pci: Use bus master address space for delivering MSI/MSI-X messages
> > breaks virtio-net for rhel6.[56] x86 guests because they don't
> > enable bus mastering for virtio PCI devices
> > 
> > Old guests forgot to enable bus mastering, enable it
> > automatically on DRIVER_OK.
> > 
> > Note: we should either back out the original patch from
> > stable or apply this one on top.
> > 
> > Cc: qemu-sta...@nongnu.org
> > Reported-by: Greg Kurz 
> > Signed-off-by: Jan Kiszka 
> > Signed-off-by: Michael S. Tsirkin 
> > ---
> >  hw/virtio/virtio-pci.c | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > index ddb5da1..af937d2 100644
> > --- a/hw/virtio/virtio-pci.c
> > +++ b/hw/virtio/virtio-pci.c
> > @@ -320,6 +320,8 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> > addr, uint32_t val)
> >  if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
> >  !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
> >  proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
> > +
> > memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
> > +  true);
> >  }
> >  break;
> >  case VIRTIO_MSI_CONFIG_VECTOR:
> 
> Cc'ing Alexey for some SLOF and early boot of the ppc64 kernel expertise.
> 
> Michael,
> 
> This was enough to fix virtio-net in the rhel6.5 x86 guest case. 
> Unfortunately,
> this fails for rhel6.5 ppc64 because it is never called... I did some 
> debugging:
> it looks like the guest kernel calls the OF quisece call to flush pending DMA
> and disables bus master on the virtio-blk device (PCI_COMMAND == 0x3). The
> guest then continues to boot and hangs... It appears that waiting for the
> guest to issue VIRTIO_CONFIG_S_DRIVER_OK is not enough.

Got it. Writing to PCI_COMMAND disabled bus mastering again.
This should do it, on top - can you confirm please?

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index af937d2..6b7ac39 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -475,10 +475,14 @@ static void virtio_write_config(PCIDevice *pci_dev, 
uint32_t address,
 pci_default_write_config(pci_dev, address, val, len);
 
 if (range_covers_byte(address, len, PCI_COMMAND) &&
-!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
-!(proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG)) {
-virtio_pci_stop_ioeventfd(proxy);
-virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
+!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
+if (proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG) {
+memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
+  true);
+} else {
+virtio_pci_stop_ioeventfd(proxy);
+virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
+}
 }
 }

Re: [Qemu-devel] [PATCH v1 1/2] block/archipelago: Implement bdrv_truncate()

2014-09-10 Thread Kevin Wolf

Am 09.09.2014 um 19:38 hat Chrysostomos Nanakos geschrieben:
> Signed-off-by: Chrysostomos Nanakos 
> ---
>  block/archipelago.c |   63 
> +--
>  1 file changed, 61 insertions(+), 2 deletions(-)

Thanks, applied all to the block branch.

Can you please add a cover letter (the [PATCH 0/n] mail as produced by
the --cover-letter option in git format-patch) for your next series?

Kevin

Re: [Qemu-devel] [PATCH 0/3] build-sys: Exclude empty object files when linking libqemuutil.a

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 08:19, Fam Zheng ha scritto:
> Paolo,
> 
> Since Stefan reviewed the first patch, would you apply this series?

Yes, pull request coming soon.

Paolo

Re: [Qemu-devel] [PATCH v1 1/2] block/archipelago: Implement bdrv_truncate()

2014-09-10 Thread Kevin Wolf

Am 10.09.2014 um 10:25 hat Chrysostomos Nanakos geschrieben:
> On 09/10/2014 11:20 AM, Kevin Wolf wrote:
> >Am 09.09.2014 um 19:38 hat Chrysostomos Nanakos geschrieben:
> >>Signed-off-by: Chrysostomos Nanakos 
> >>---
> >>  block/archipelago.c |   63 
> >> +--
> >>  1 file changed, 61 insertions(+), 2 deletions(-)
> >Thanks, applied all to the block branch.
> >
> >Can you please add a cover letter (the [PATCH 0/n] mail as produced by
> >the --cover-letter option in git format-patch) for your next series?
> >
> >Kevin
> Yes of course, removed it in the last minute thinking that it was
> only two commits and there was no need for a cover letter.

No big deal, but besides having combined diffstats, it also allows
distinguishing between comments on the first patch and comments on the
whole series, so I prefer to have it when it's more than one patch.

Of course, you don't have to actually write a description for the cover
letter for such a small series, the automatically generated stuff is
good enough.

Kevin

Re: [Qemu-devel] [PATCH 09/10] piix: do not raise irq while loading vmstate

2014-09-10 Thread Paolo Bonzini

Il 09/09/2014 22:51, Michael S. Tsirkin ha scritto:
> > i440FX/PIIX3 state is loaded before i8259, so the interrupt will never
> > be in the i8259 ISR.  I am not sure why it is a problem for
> > record/replay, but I think it's plausible to consider this a bug.  i8259
> > state should not be affected by the load of PIIX3 state, since i8259 is
> > migrated separately.
> 
> Sorry I still don't understand. Why do stuff from vmstate callback then?
> How is it different?

Reconstructing internal state from post_load is okay.

What is not okay (and I think it should be a rule) is to touch other
devices from post_load, unless you know that they are deserialized
first.  For example it's okay for a PCI device to talk to the parent
bridge in its post_load function.

In the case of PIIX3 vs. i8259, however, you know that i8259 is
deserialized _last_ because i8259 is an ISA device and PIIX3 provides
the ISA bus.  So it's incorrect, even though it's currently harmless, to
touch the i8259 before it's deserialized.

> I'd like to see a description of a scenario where this patch makes
> a difference.

Of course it would be nice to have testcases for this, but I guess one
case could be:

- LAPIC configured in ExtINT mode

- interrupts are masked in the i8259, but the i8259 doesn't know that
yet because it's not been loaded yet

- the PIIX3 loads the state and the interrupt is set.  pic_set_irq is
called, calls pic_update_irq

- pic_update_irq calls pic_get_irq, which uses IMR=0 and thus raises LINT0

- the APIC has been loaded already, so LINT0 is injected incorrectly


Another case could be:

- i8259 is processing IRQ0.  The lower-priority interrupt from PIIX3 is
in IRR.  Machine is migrated.

- the PIIX3 loads the state and sets the interrupt in the i8259.
pic_set_irq is called, calls pic_update_irq, calls pic_get_irq

- because i8259 has not been loaded yet, pic_get_irq sees ISR=0 and the
interrupt is injected even though IRQ0 (higher priority) is being serviced.


In both cases, the saved i8259 state will have the PIIX3 interrupt in
IRR, so the interrupt is not lost, just held (as it would have been on
the source machine).

Paolo

[Qemu-devel] [RESEND RFC PATCH v2] Add HMP command "info memory-devices"

2014-09-10 Thread Zhu Guihua

This patch provides HMP equivalent of QMP query-memory-devices command. By this 
command "info memory-devices", user can know all information about hotpluggable 
memmory device such as id. With id of devices, hot removing hotpluggable memory 
devices becomes possible by command 'device_del'.

Change log v1 -> v2:
1. fix bug that accessing info->dimm when MemoryDeviceInfo is not PCDIMMDevice.
2. use enum to replace "dimm", and lookup type name in 
MemoryDeviceInfoKind_lookup[] instead of opencodding it.

Signed-off-by: Zhu Guihua 
---
 hmp-commands.hx |  2 ++
 hmp.c   | 43 +++
 hmp.h   |  1 +
 monitor.c   |  7 +++
 4 files changed, 53 insertions(+)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index f859f8d..0b1a4f7 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1778,6 +1778,8 @@ show qdev device model list
 show roms
 @item info tpm
 show the TPM device
+@item info memory-devices
+show the memory devices
 @end table
 ETEXI
 
diff --git a/hmp.c b/hmp.c
index 40a90da..93c1dfe 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1718,3 +1718,46 @@ void hmp_info_memdev(Monitor *mon, const QDict *qdict)
 
 qapi_free_MemdevList(memdev_list);
 }
+
+void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
+{
+Error *err = NULL;
+MemoryDeviceInfoList *list = qmp_query_memory_devices(&err);
+MemoryDeviceInfoList *elem = list;
+MemoryDeviceInfo *info;
+PCDIMMDeviceInfo *di;
+int i = 0;
+
+while (elem) {
+info = elem->value;
+
+if (info) {
+switch (info->kind) {
+case MEMORY_DEVICE_INFO_KIND_DIMM:
+di = info->dimm;
+
+monitor_printf(mon, "MemoryDevice %d\n", i);
+monitor_printf(mon, "  %s\n",
+   
MemoryDeviceInfoKind_lookup[MEMORY_DEVICE_INFO_KIND_DIMM]);
+monitor_printf(mon, "  id: %s\n", di->id);
+monitor_printf(mon, "  addr: %" PRId64 "\n", di->addr);
+monitor_printf(mon, "  slot: %" PRId64 "\n", di->slot);
+monitor_printf(mon, "  node: %" PRId64 "\n", di->node);
+monitor_printf(mon, "  size: %" PRId64 "\n", di->size);
+monitor_printf(mon, "  memdev: %s\n", di->memdev);
+monitor_printf(mon, "  hotplugged: %s\n",
+   di->hotplugged ? "true" : "false");
+monitor_printf(mon, "  hotpluggable: %s\n",
+   di->hotpluggable ? "true" : "false");
+break;
+default:
+break;
+}
+}
+
+elem = elem->next;
+i++;
+}
+
+qapi_free_MemoryDeviceInfoList(list);
+}
diff --git a/hmp.h b/hmp.h
index 4fd3c4a..4bb5dca 100644
--- a/hmp.h
+++ b/hmp.h
@@ -94,6 +94,7 @@ void hmp_cpu_add(Monitor *mon, const QDict *qdict);
 void hmp_object_add(Monitor *mon, const QDict *qdict);
 void hmp_object_del(Monitor *mon, const QDict *qdict);
 void hmp_info_memdev(Monitor *mon, const QDict *qdict);
+void hmp_info_memory_devices(Monitor *mon, const QDict *qdict);
 void object_add_completion(ReadLineState *rs, int nb_args, const char *str);
 void object_del_completion(ReadLineState *rs, int nb_args, const char *str);
 void device_add_completion(ReadLineState *rs, int nb_args, const char *str);
diff --git a/monitor.c b/monitor.c
index 34cee74..fe88e0d 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2921,6 +2921,13 @@ static mon_cmd_t info_cmds[] = {
 .mhandler.cmd = hmp_info_memdev,
 },
 {
+.name   = "memory-devices",
+.args_type  = "",
+.params = "",
+.help   = "show memory devices",
+.mhandler.cmd = hmp_info_memory_devices,
+},
+{
 .name   = NULL,
 },
 };
-- 
1.9.3

Re: [Qemu-devel] [PATCH 09/10] piix: do not raise irq while loading vmstate

2014-09-10 Thread Peter Maydell

On 10 September 2014 09:38, Paolo Bonzini  wrote:
> Il 09/09/2014 22:51, Michael S. Tsirkin ha scritto:
>> Sorry I still don't understand. Why do stuff from vmstate callback then?
>> How is it different?
>
> Reconstructing internal state from post_load is okay.
>
> What is not okay (and I think it should be a rule) is to touch other
> devices from post_load, unless you know that they are deserialized
> first.  For example it's okay for a PCI device to talk to the parent
> bridge in its post_load function.

I don't think it's right to talk to another device even if you do
know it's deserialized first. Talking to it might make it change
its state, which would be wrong (since its correct state is
the state it's just deserialized). I would suggest the rule should
be "never do something that can change the state of another
device in post-load".

(We have similar issues with reset, except worse in that we
don't have a coherent rule to cause everything to come out
of reset in the right state.)

thanks
-- PMM

Re: [Qemu-devel] [PATCH] block: Make op blockers recursive

2014-09-10 Thread Fam Zheng

On Thu, 09/04 21:42, Stefan Hajnoczi wrote:
> On Tue, Aug 26, 2014 at 06:45:54AM +, Benoît Canet wrote:
> > On Tue, Aug 26, 2014 at 12:42:04PM +0800, Fam Zheng wrote:
> > > On Mon, 08/25 12:12, Benoît Canet wrote:
> > > > On Mon, Aug 25, 2014 at 05:37:37PM +0800, Fam Zheng wrote:
> > > > > On Mon, 08/25 09:06, Benoît Canet wrote:
> > > > > > On Mon, Aug 25, 2014 at 02:04:24PM +0800, Fam Zheng wrote:
> > > > > > > On Fri, 08/22 18:11, Benoît Canet wrote:
> > > > > > > > Since the block layer code is starting to modify the BDS graph 
> > > > > > > > right in the
> > > > > > > > middle of BDS chains (block-mirror's replace parameter for 
> > > > > > > > example) QEMU needs
> > > > > > > > to properly block and unblock whole BDS subtrees; recursion is 
> > > > > > > > a neat way to
> > > > > > > > achieve this task.
> > > > > > > > 
> > > > > > > > This patch also takes care of modifying the op blockers users.
> > > > > > > 
> > > > > > > Is this going to replace backing_blocker?
> > > > > > > 
> > > > > > > I think it is too general an approach to control the operation 
> > > > > > > properly,
> > > > > > > because the op blocker may not work in the same way for all types 
> > > > > > > of BDS
> > > > > > > connections.  In other words, the choosing of op blockers are 
> > > > > > > likely
> > > > > > > conditional on graph edge types, that's why backing_blocker was 
> > > > > > > added here. For
> > > > > > > example, A VMDK extent connection will probably need a different 
> > > > > > > set of
> > > > > > > blockers than bs->file connection.
> > > > > > > 
> > > > > > > So could you explain in which cases is the recursive 
> > > > > > > blocking/unblocking
> > > > > > > useful?
> > > > > > 
> > > > > > It's designed for the new crop of block operations operating on BDS 
> > > > > > located in
> > > > > > the middle of the backing chain: Jeff's patches, intermediate live 
> > > > > > streaming or
> > > > > > intermediate mirroring.
> > > > > > Recursively blocking BDS allows to do these operations safely.
> > > > > 
> > > > > Sorry I may be slow on this, but it's still not clear to me.
> > > > > 
> > > > > That doesn't immediately show how backing_blocker doesn't work. These
> > > > > operations are in the category of operations that update graph 
> > > > > topology,
> > > > > meaning that they drop, add or swap some nodes in the middle of the 
> > > > > chain. It
> > > > > is not safe because there are used by the other nodes, but they are 
> > > > > supposed to
> > > > > be protected by backing_blocker. Could you be more specific?
> > > > 
> > > > I don't know particularly about the backing blocker case.
> > > > 
> > > > > 
> > > > > I can think of something more than backing_hd: there are also link 
> > > > > types other
> > > > > than backing_hd, for example ->file, (vmdk)->extents or 
> > > > > (quorum)->qcrs, etc.
> > > > > They should be protected as well.
> > > > 
> > > > This patch takes cares of recursing everywhere.
> > > > 
> > > > I can give you an example for quorum.
> > > > 
> > > > If a streaming operation is running on a quorum block backend the 
> > > > recursive
> > > > blocking will help to block any operation done directly on any of the 
> > > > children.
> > > 
> > > At what points should block layer recursively block/unblock the 
> > > operations in
> > > this quorum case?
> > 
> > When the streaming starts it should block all the top bs children.
> > So after when an operation tries to operate on a child of the top bs it 
> > will be
> > forbidden.
> > 
> > The beauty of it is that recursive blockers can easily replace regular 
> > blockers.
> 
> Let's think of a situation that recursive blockers protect but
> backing_blocker does not:
> 
> a <- b <- c <- d
> 
> c is the backing file and is therefore protected by the op blocker.
> 
> The block-commit command works with node-names, however, so we can
> manipulate any nodes in the graph, not just the topmost one.  Try this:
> 
> block-commit d
> block-commit b
> 
> I haven't checked yet but I suspect it will launch two block-commit jobs
> on the same partial chain (that's a bad thing because it can lead to
> corruption).

1) Does block-commit work with node-names already? In other words, is
   block-commit b possible now? I only see drive-mirror works with it, but not
   drive-backup, block-mirror or block-commit.

2) Regardless of the answer to 1), I think we could use a similar approach as
   drive-backup here: split BLOCK_OP_TYPE_COMMIT to
   BLOCK_OP_TYPE_COMMIT_{SOURCE,TARGET}, and only unblock
   BLOCK_OP_TYPE_COMMIT_TARGET in bdrv_set_backing_hd.

Unblocking BLOCK_OP_TYPE_COMMIT on backing_hd is wrong as long as we expose "d"
to block-commit, with node-name.

As the next step, let's think about what it takes to safely allow commit d to b
with:

a <- b <- c <- d <- e <- f <- g

I think the answer is: if the commit job checks and sets blockers on range [d, 
b],
it is safe. Because from e, f and g's points of view, the whole bac

Re: [Qemu-devel] [PATCH] virtio-pci: enable bus master for old guests

2014-09-10 Thread Greg Kurz

On Wed, 10 Sep 2014 13:44:49 +0530
Nikunj A Dadhania  wrote:
> Greg Kurz  writes:
> 
> > On Mon, 8 Sep 2014 19:05:02 +0300
> > "Michael S. Tsirkin"  wrote:
> >
> >> commit cc943c36faa192cd4b32af8fe5edb31894017d35
> >> pci: Use bus master address space for delivering MSI/MSI-X messages
> >> breaks virtio-net for rhel6.[56] x86 guests because they don't
> >> enable bus mastering for virtio PCI devices
> >> 
> >> Old guests forgot to enable bus mastering, enable it
> >> automatically on DRIVER_OK.
> >> 
> >> Note: we should either back out the original patch from
> >> stable or apply this one on top.
> >> 
> >> Cc: qemu-sta...@nongnu.org
> >> Reported-by: Greg Kurz 
> >> Signed-off-by: Jan Kiszka 
> >> Signed-off-by: Michael S. Tsirkin 
> >> ---
> >>  hw/virtio/virtio-pci.c | 2 ++
> >>  1 file changed, 2 insertions(+)
> >> 
> >> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> >> index ddb5da1..af937d2 100644
> >> --- a/hw/virtio/virtio-pci.c
> >> +++ b/hw/virtio/virtio-pci.c
> >> @@ -320,6 +320,8 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> >> addr, uint32_t val)
> >>  if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
> >>  !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
> >>  proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
> >> +
> >> memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
> >> +  true);
> >>  }
> >>  break;
> >>  case VIRTIO_MSI_CONFIG_VECTOR:
> >
> > Cc'ing Alexey for some SLOF and early boot of the ppc64 kernel expertise.
> >
> > Michael,
> >
> > This was enough to fix virtio-net in the rhel6.5 x86 guest case. 
> > Unfortunately,
> > this fails for rhel6.5 ppc64 because it is never called... 
> 
> > I did some debugging: it looks like the guest kernel calls the OF
> > quisece call to flush pending DMA and disables bus master on the
> > virtio-blk device (PCI_COMMAND == 0x3).
> 
> Getting confused, above you are talking about virtio-net and here it is
> virtio-blk.
> 

I tried running rhel6.5 (old kernel that doesn't enable bus mastering on
virtio PCI devices), with a virtio-blk based disk and a virtio-net based
NIC for both x86_64 and ppc64. Results are as follow:
- x86_64: boots well but fails to activate network
- ppc64: does not boot because the virtio-blk notification doesn't
 reach the guest

> Anyways, the routines still remains same for both of them.  From SLOF
> during init we set DRIVER_OK, and after using the device during the
> quiesce, called from linux kernel VIRTIO_CONFIG_S_FAILED is set and then
> a VIRTIO_DEVICE_RESET is done.
> 

I chose to debug by attaching gdb to qemu-system-ppc64 itself. It appears
that SLOF seems to be enabling bus master during init but at some point bus
master gets disabled... unfortunately my SLOF knowledge is limited and I
don't know how exactly what's happening in the guest between SLOF and the
kernel.

> > The guest then continues to boot and hangs... It appears that waiting
> > for the guest to issue VIRTIO_CONFIG_S_DRIVER_OK is not enough. Since
> > we need this for MSI to work, I tried the following and it fixes the
> > issue:
> >
> > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > index af937d2..3d72aa8 100644
> > --- a/hw/virtio/virtio-pci.c
> > +++ b/hw/virtio/virtio-pci.c
> > @@ -111,9 +111,14 @@ static void virtio_pci_notify(DeviceState *d, uint16_t 
> > vector)
> >  {
> >  VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
> >  
> > -if (msix_enabled(&proxy->pci_dev))
> > +if (msix_enabled(&proxy->pci_dev)) {
> > +if (!(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
> > +proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
> > +
> > memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
> > +  true);
> > +}
> >  msix_notify(&proxy->pci_dev, vector);
> > -else {
> > +} else {
> >  VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> >  pci_set_irq(&proxy->pci_dev, vdev->isr & 1);
> >  }
> >
> > If this is acceptable, I'll make it a helper and squash it into your patch.
> >
> > Thoughts ?
> >
> > -- 
> > Gregory Kurz kurzg...@fr.ibm.com
> >  gk...@linux.vnet.ibm.com
> > Software Engineer @ IBM/Meiosys  http://www.ibm.com
> > Tel +33 (0)562 165 496
> >
> > "Anarchy is about taking complete responsibility for yourself."
> > Alan Moore.



-- 
Gregory Kurz kurzg...@fr.ibm.com
 gk...@linux.vnet.ibm.com
Software Engineer @ IBM/Meiosys  http://www.ibm.com
Tel +33 (0)562 165 496

"Anarchy is about taking complete responsibility for yourself."
Alan Moore.

Re: [Qemu-devel] ballooning not working on hotplugged pc-dimm

2014-09-10 Thread zhanghailiang


On 2014/9/9 11:05, Alexandre DERUMIER wrote:

Hello,

I was playing with pc-dimm hotplug, and I notice that balloning is not working 
on
memory space of pc-dimm devices.

example:

qemu -m size=1024,slots=255,maxmem=15000M

#free -m : 1024M
->  qmp balloon 512M
#free -m : 512M

->  hotplug pc-dimm 1G:

#free -m : 1512M


(This is the same behavior if qemu is started with pc-dimm devices)


qemu 2.1
Guest kernel : 3.12.



Does it need a guest balloon module update ?

Regards,

Alexandre Derumier




Hi,

I noticed this fault also;-), this is a bug in qemu, And some work in process...

Actually in QEMU the hotplugged memory(named hotplug-memory) is distinguished
from the common memory(named pc.ram). When do balloon action, QEMU wrongly
stats the ram_size.

I will send a patch series which also include other bug fix for the memory 
hotplug.

Thanks,
zhanghailiang

Re: [Qemu-devel] [PATCH] virtio-pci: enable bus master for old guests

2014-09-10 Thread Greg Kurz

On Wed, 10 Sep 2014 12:32:30 +0300
"Michael S. Tsirkin"  wrote:

> On Wed, Sep 10, 2014 at 01:44:49PM +0530, Nikunj A Dadhania wrote:
> > Greg Kurz  writes:
> > 
> > > On Mon, 8 Sep 2014 19:05:02 +0300
> > > "Michael S. Tsirkin"  wrote:
> > >
> > >> commit cc943c36faa192cd4b32af8fe5edb31894017d35
> > >> pci: Use bus master address space for delivering MSI/MSI-X messages
> > >> breaks virtio-net for rhel6.[56] x86 guests because they don't
> > >> enable bus mastering for virtio PCI devices
> > >> 
> > >> Old guests forgot to enable bus mastering, enable it
> > >> automatically on DRIVER_OK.
> > >> 
> > >> Note: we should either back out the original patch from
> > >> stable or apply this one on top.
> > >> 
> > >> Cc: qemu-sta...@nongnu.org
> > >> Reported-by: Greg Kurz 
> > >> Signed-off-by: Jan Kiszka 
> > >> Signed-off-by: Michael S. Tsirkin 
> > >> ---
> > >>  hw/virtio/virtio-pci.c | 2 ++
> > >>  1 file changed, 2 insertions(+)
> > >> 
> > >> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > >> index ddb5da1..af937d2 100644
> > >> --- a/hw/virtio/virtio-pci.c
> > >> +++ b/hw/virtio/virtio-pci.c
> > >> @@ -320,6 +320,8 @@ static void virtio_ioport_write(void *opaque, 
> > >> uint32_t addr, uint32_t val)
> > >>  if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
> > >>  !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) 
> > >> {
> > >>  proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
> > >> +
> > >> memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
> > >> +  true);
> > >>  }
> > >>  break;
> > >>  case VIRTIO_MSI_CONFIG_VECTOR:
> > >
> > > Cc'ing Alexey for some SLOF and early boot of the ppc64 kernel expertise.
> > >
> > > Michael,
> > >
> > > This was enough to fix virtio-net in the rhel6.5 x86 guest case. 
> > > Unfortunately,
> > > this fails for rhel6.5 ppc64 because it is never called... 
> > 
> > > I did some debugging: it looks like the guest kernel calls the OF
> > > quisece call to flush pending DMA and disables bus master on the
> > > virtio-blk device (PCI_COMMAND == 0x3).
> > 
> > Getting confused, above you are talking about virtio-net and here it is
> > virtio-blk.
> > 
> > Anyways, the routines still remains same for both of them.  From SLOF
> > during init we set DRIVER_OK, and after using the device during the
> > quiesce, called from linux kernel VIRTIO_CONFIG_S_FAILED is set and then
> > a VIRTIO_DEVICE_RESET is done.
> 
> BTW, you really should start enabling bus mastering, avoid relying
> on the work-around we have for broken guests.
> 

FWIW during my debug session, I see that SLOF enables bus mastering...
unfortunately, it gets disabled at some point after the guest kernel
is started (around the ppc64 prom_init() call).

> > > The guest then continues to boot and hangs... It appears that waiting
> > > for the guest to issue VIRTIO_CONFIG_S_DRIVER_OK is not enough. Since
> > > we need this for MSI to work, I tried the following and it fixes the
> > > issue:
> > >
> > > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > > index af937d2..3d72aa8 100644
> > > --- a/hw/virtio/virtio-pci.c
> > > +++ b/hw/virtio/virtio-pci.c
> > > @@ -111,9 +111,14 @@ static void virtio_pci_notify(DeviceState *d, 
> > > uint16_t vector)
> > >  {
> > >  VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
> > >  
> > > -if (msix_enabled(&proxy->pci_dev))
> > > +if (msix_enabled(&proxy->pci_dev)) {
> > > +if (!(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
> > > +proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
> > > +
> > > memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
> > > +  true);
> > > +}
> > >  msix_notify(&proxy->pci_dev, vector);
> > > -else {
> > > +} else {
> > >  VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> > >  pci_set_irq(&proxy->pci_dev, vdev->isr & 1);
> > >  }
> > >
> > > If this is acceptable, I'll make it a helper and squash it into your 
> > > patch.
> > >
> > > Thoughts ?
> > >
> > > -- 
> > > Gregory Kurz kurzg...@fr.ibm.com
> > >  gk...@linux.vnet.ibm.com
> > > Software Engineer @ IBM/Meiosys  http://www.ibm.com
> > > Tel +33 (0)562 165 496
> > >
> > > "Anarchy is about taking complete responsibility for yourself."
> > > Alan Moore.
> 



-- 
Gregory Kurz kurzg...@fr.ibm.com
 gk...@linux.vnet.ibm.com
Software Engineer @ IBM/Meiosys  http://www.ibm.com
Tel +33 (0)562 165 496

"Anarchy is about taking complete responsibility for yourself."
Alan Moore.

Re: [Qemu-devel] [PATCH] Fix typos and misspellings in comments

2014-09-10 Thread zhanghailiang


On 2014/9/9 21:43, Andreas Färber wrote:

Am 09.09.2014 10:09, schrieb Peter Maydell:

On 9 September 2014 04:23, zhanghailiang  wrote:

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2ab4460..bedef2f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -541,7 +541,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
  _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));

  /*
- * According to PAPR, rtas ibm,os-term, does not gaurantee a return
+ * According to PAPR, rtas ibm,os-term, does not guarantee a return


"..., rtas,os-term does not ..."? (i.e., drop the comma while at it)



OK, Thanks.


   * back to the guest cpu.
   *
   * While an additional ibm,extended-os-term property indicates that


OK


Regards,
Andreas

Re: [Qemu-devel] [PATCH 09/10] piix: do not raise irq while loading vmstate

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 10:51, Peter Maydell ha scritto:
> > What is not okay (and I think it should be a rule) is to touch other
> > devices from post_load, unless you know that they are deserialized
> > first.  For example it's okay for a PCI device to talk to the parent
> > bridge in its post_load function.
> 
> I don't think it's right to talk to another device even if you do
> know it's deserialized first. Talking to it might make it change
> its state, which would be wrong (since its correct state is
> the state it's just deserialized). I would suggest the rule should
> be "never do something that can change the state of another
> device in post-load".

That's harder to do, but if it is possible to do it, it would be great
as well.

It would not surprise me to find a case where the parent device actually
_expects_ the children's post_load to inform it about something, instead
of serializing that part of state on its own.

Paolo

> (We have similar issues with reset, except worse in that we
> don't have a coherent rule to cause everything to come out
> of reset in the right state.)

Re: [Qemu-devel] [PATCH 1/6] hw/arm/virt: Provide flash devices for boot ROMs

2014-09-10 Thread Ard Biesheuvel

On 9 September 2014 20:20, Peter Maydell  wrote:
> On 5 September 2014 16:15, Ard Biesheuvel  wrote:
>> From: Peter Maydell 
>>
>> Add two flash devices to the virt board, so that it can be used for
>> running guests which want a bootrom image such as UEFI. We provide
>> two flash devices to make it more convenient to provide both a
>> read-only UEFI image and a read-write place to store guest-set
>> UEFI config variables. The '-bios' command line option is set up
>> to provide an image for the first of the two flash devices.
>>
>> Signed-off-by: Peter Maydell 
>> Reviewed-by: Paolo Bonzini 
>> ---
>>  hw/arm/virt.c | 70 
>> +++
>>  1 file changed, 70 insertions(+)
>
> This one's been around long enough that I'm going to add
> it to target-arm.next now.
>
> There were previously questions about whether we should
> have flash or RAM at the bottom, but I think it makes
> sense just to have a "like vexpress" config with two
> flash devices. This does make telling QEMU about backing
> storage for the 2nd flash a little complicated, but I
> think anybody seriously running a config like that will
> be using the management tools layer anyhow.
>

You mean having to use -pflash and pad the images out to 64 MB? I
wouldn't worry about that.

Re: [Qemu-devel] [PATCH v5 00/22] block: Asynchronous request cancellation

2014-09-10 Thread Bin Wu


On 2014/9/10 13:59, Fam Zheng wrote:

v5: Fix IDE callback. (Paolo)
 Fix blkdebug. (Paolo)
 Drop the DMA fix which is independent of this series. (Paolo)
 Incorperate Yuan's patch on quorum_aio_cancel. (Benoît)
 Commit message wording fix. (Benoît)
 Rename qemu_aio_release to qemu_aio_unref. (Benoît)

v4: Drop AIOCBInfo.cancel.

This series adds a new block layer API:

   void bdrv_aio_cancel_async(BlockDriverAIOCB *acb);

And use it to emulate bdrv_aio_cancel.

The function is similar to bdrv_aio_cancel in that it cancels an AIO request,
but different that it doesn't block until the request is completely cancelled
or done.

More importantly, the completion callback, BlockDriverAIOCB.cb, is guaranteed
to be called, so that the cb can take care of resource releasing and status
reporting to guest, etc.

In the following work, scsi emulation code will be shifted to use the async
cancelling.

One major benefit would be that when guest tries to cancel a request, where the
request cannot be cancelled easily, (due to throttled BlockDriverState, a lost
connection, or a large request queue), we don't need to block the whole vm with
a busy loop, which is how bdrv_aio_cancel is implemented now.


First, I think this series is really useful. However, I tested the v4 
series and found virtio-scsi disk(scsi-hd) was still blocked when the IO 
could not come back because of 
virtio_scsi_do_tmf->scsi_cancel_io->bdrv_aio_cancel. can we just change 
the bdrv_aio_cancel to bdrv_aio_cancel_async to solve this problem?




A test case that is easy to reproduce is, throttle a scsi-disk to a very low
limit, for example 50 bps, then stress the guest block device with dd or fio.

Currently, the vm will quickly hang when it loses patience and send a tmf
command to cancel the request, at which point we will busy wait in
bdrv_aio_cancel, until the request is slowly spit out from throttled_reqs.

Later, we will change scsi device code to make this asynchronous, on top of
bdrv_aio_cancel_async.

Fam


Fam Zheng (21):
   ide/ahci: Check for -ECANCELED in aio callbacks
   block: Add refcnt in BlockDriverAIOCB
   block: Add bdrv_aio_cancel_async
   block: Drop bdrv_em_co_aiocb_info.cancel
   block: Convert bdrv_em_aiocb_info.cancel to .cancel_async
   thread-pool: Convert thread_pool_aiocb_info.cancel to cancel_async
   linux-aio: Convert laio_aiocb_info.cancel to .cancel_async
   dma: Convert dma_aiocb_info.cancel to .cancel_async
   iscsi: Convert iscsi_aiocb_info.cancel to .cancel_async
   archipelago: Drop archipelago_aiocb_info.cancel
   blkdebug: Drop blkdebug_aiocb_info.cancel
   blkverify: Drop blkverify_aiocb_info.cancel
   curl: Drop curl_aiocb_info.cancel
   qed: Drop qed_aiocb_info.cancel
   quorum: Convert quorum_aiocb_info.cancel to .cancel_async
   rbd: Drop rbd_aiocb_info.cancel
   sheepdog: Convert sd_aiocb_info.cancel to .cancel_async
   win32-aio: Drop win32_aiocb_info.cancel
   ide: Convert trim_aiocb_info.cancel to .cancel_async
   block: Drop AIOCBInfo.cancel
   block: Rename qemu_aio_release -> qemu_aio_unref

Liu Yuan (1):
   quorum: fix quorum_aio_cancel()

  block.c  | 69 
  block/archipelago.c  | 19 ++---
  block/blkdebug.c | 17 ++--
  block/blkverify.c| 21 +--
  block/curl.c | 16 ---
  block/iscsi.c| 23 
  block/linux-aio.c| 34 +++-
  block/qed.c  | 23 +---
  block/quorum.c   | 11 
  block/rbd.c  | 25 ++
  block/sheepdog.c | 54 -
  block/win32-aio.c| 18 ++---
  dma-helpers.c| 20 +++---
  hw/ide/ahci.c|  3 +++
  hw/ide/core.c| 26 --
  include/block/aio.h  |  7 +++--
  include/block/block.h|  1 +
  tests/test-thread-pool.c | 34 ++--
  thread-pool.c| 36 +++--
  19 files changed, 172 insertions(+), 285 deletions(-)

Re: [Qemu-devel] [PATCH v7 RESEND 4/8] memory: add parameter errp to memory_region_init_rom_device

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 04:05, Hu Tao ha scritto:
>> > 
>> > Better not use error_abort if we can avoid it, and here it's particularly
>> > easy...
> Is error_abort deprecated?

No, not at at all.  It is useful whenever you know that an error cannot
happen.  However, if it makes sense and it is easy, error propagation is
better.  For every patch today that removes an exit(1), tomorrow we
could have a patch that removes an &error_abort.

Paolo

[Qemu-devel] [PATCH v15 1/5] block: round up file size to nearest sector

2014-09-10 Thread Hu Tao

Currently the file size requested by user is rounded down to nearest
sector, causing the actual file size could be a bit less than the size
user requested. Since some formats (like qcow2) record virtual disk
size in bytes, this can make the last few bytes cannot be accessed.

This patch fixes it by rounding up file size to nearest sector so that
the actual file size is no less than the requested file size.

Signed-off-by: Hu Tao 
Reviewed-by: Kevin Wolf 
Reviewed-by: Eric Blake 
Reviewed-by: Max Reitz 
---
 block/archipelago.c  |  3 ++-
 block/cow.c  |  3 ++-
 block/gluster.c  |  4 +--
 block/iscsi.c|  4 +--
 block/nfs.c  |  3 ++-
 block/qcow.c |  3 ++-
 block/qcow2.c|  3 ++-
 block/qed.c  |  3 ++-
 block/raw-posix.c|  8 +++---
 block/raw-win32.c|  4 +--
 block/rbd.c  |  3 ++-
 block/sheepdog.c |  3 ++-
 block/ssh.c  |  3 ++-
 block/vdi.c  |  3 ++-
 block/vhdx.c |  3 ++-
 block/vmdk.c |  3 ++-
 block/vpc.c  |  3 ++-
 tests/qemu-iotests/104   | 57 
 tests/qemu-iotests/104.out   | 12 +
 tests/qemu-iotests/common.filter | 21 +++
 tests/qemu-iotests/group |  1 +
 21 files changed, 127 insertions(+), 23 deletions(-)
 create mode 100755 tests/qemu-iotests/104
 create mode 100644 tests/qemu-iotests/104.out

diff --git a/block/archipelago.c b/block/archipelago.c
index 22a7daa..06c51f9 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -708,7 +708,8 @@ static int qemu_archipelago_create(const char *filename,
 
 parse_filename_opts(filename, errp, &volname, &segment_name, &mport,
 &vport);
-total_size = qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0);
+total_size = ROUND_UP(qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0),
+  BDRV_SECTOR_SIZE);
 
 if (segment_name == NULL) {
 segment_name = g_strdup("archipelago");
diff --git a/block/cow.c b/block/cow.c
index 6ee4833..c3769fe 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -335,7 +335,8 @@ static int cow_create(const char *filename, QemuOpts *opts, 
Error **errp)
 BlockDriverState *cow_bs = NULL;
 
 /* Read out options */
-image_sectors = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
+image_sectors = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 
0),
+ BDRV_SECTOR_SIZE);
 image_filename = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
 
 ret = bdrv_create_file(filename, opts, &local_err);
diff --git a/block/gluster.c b/block/gluster.c
index 1912cf9..65c7a58 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -494,8 +494,8 @@ static int qemu_gluster_create(const char *filename,
 goto out;
 }
 
-total_size =
-qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
+total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+  BDRV_SECTOR_SIZE);
 
 tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
 if (!tmp || !strcmp(tmp, "off")) {
diff --git a/block/iscsi.c b/block/iscsi.c
index 3e19202..84bcae8 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1531,8 +1531,8 @@ static int iscsi_create(const char *filename, QemuOpts 
*opts, Error **errp)
 bs = bdrv_new("", &error_abort);
 
 /* Read out options */
-total_size =
-qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
+total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+  BDRV_SECTOR_SIZE);
 bs->opaque = g_new0(struct IscsiLun, 1);
 iscsilun = bs->opaque;
 
diff --git a/block/nfs.c b/block/nfs.c
index 194f301..c76e368 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -418,7 +418,8 @@ static int nfs_file_create(const char *url, QemuOpts *opts, 
Error **errp)
 client->aio_context = qemu_get_aio_context();
 
 /* Read out options */
-total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+  BDRV_SECTOR_SIZE);
 
 ret = nfs_client_open(client, url, O_CREAT, errp);
 if (ret < 0) {
diff --git a/block/qcow.c b/block/qcow.c
index 67c237f..041af26 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -725,7 +725,8 @@ static int qcow_create(const char *filename, QemuOpts 
*opts, Error **errp)
 BlockDriverState *qcow_bs;
 
 /* Read out options */
-total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
+total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+  BDRV_SECTOR_SIZE);
 backing_file = qemu_opt_get_del(op

[Qemu-devel] [PATCH v15 5/5] qcow2: Add falloc and full preallocation option

2014-09-10 Thread Hu Tao

preallocation=falloc allocates disk space by posix_fallocate(),
preallocation=full allocates disk space by writing zeros to disk.
Both modes imply preallocation=metadata.

Signed-off-by: Hu Tao 
Reviewed-by: Max Reitz 
---
 block/qcow2.c  | 63 ++
 qemu-doc.texi  |  8 +++---
 qemu-img.texi  |  8 +++---
 tests/qemu-iotests/082.out | 54 +++
 4 files changed, 90 insertions(+), 43 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 2d68b51..0daf25c 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1772,6 +1772,56 @@ static int qcow2_create2(const char *filename, int64_t 
total_size,
 Error *local_err = NULL;
 int ret;
 
+if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
+int64_t meta_size = 0;
+uint64_t nreftablee, nrefblocke, nl1e, nl2e;
+int64_t aligned_total_size = align_offset(total_size, cluster_size);
+
+/* header: 1 cluster */
+meta_size += cluster_size;
+
+/* total size of L2 tables */
+nl2e = aligned_total_size / cluster_size;
+nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
+meta_size += nl2e * sizeof(uint64_t);
+
+/* total size of L1 tables */
+nl1e = nl2e * sizeof(uint64_t) / cluster_size;
+nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t));
+meta_size += nl1e * sizeof(uint64_t);
+
+/* total size of refcount blocks
+ *
+ * note: every host cluster is reference-counted, including metadata
+ * (even refcount blocks are recursively included).
+ * Let:
+ *   a = total_size (this is the guest disk size)
+ *   m = meta size not including refcount blocks and refcount tables
+ *   c = cluster size
+ *   y1 = number of refcount blocks entries
+ *   y2 = meta size including everything
+ * then,
+ *   y1 = (y2 + a)/c
+ *   y2 = y1 * sizeof(u16) + y1 * sizeof(u16) * sizeof(u64) / c + m
+ * we can get y1:
+ *   y1 = (a + m) / (c - sizeof(u16) - sizeof(u16) * sizeof(u64) / c)
+ */
+nrefblocke = (aligned_total_size + meta_size + cluster_size) /
+(cluster_size - sizeof(uint16_t) -
+ 1.0 * sizeof(uint16_t) * sizeof(uint64_t) / cluster_size);
+nrefblocke = align_offset(nrefblocke, cluster_size / sizeof(uint16_t));
+meta_size += nrefblocke * sizeof(uint16_t);
+
+/* total size of refcount tables */
+nreftablee = nrefblocke * sizeof(uint16_t) / cluster_size;
+nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t));
+meta_size += nreftablee * sizeof(uint64_t);
+
+qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
+aligned_total_size + meta_size);
+qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_lookup[prealloc]);
+}
+
 ret = bdrv_create_file(filename, opts, &local_err);
 if (ret < 0) {
 error_propagate(errp, local_err);
@@ -1877,7 +1927,7 @@ static int qcow2_create2(const char *filename, int64_t 
total_size,
 }
 
 /* And if we're supposed to preallocate metadata, do that now */
-if (prealloc == PREALLOC_MODE_METADATA) {
+if (prealloc != PREALLOC_MODE_OFF) {
 BDRVQcowState *s = bs->opaque;
 qemu_co_mutex_lock(&s->lock);
 ret = preallocate(bs);
@@ -1958,14 +2008,6 @@ static int qcow2_create(const char *filename, QemuOpts 
*opts, Error **errp)
 flags |= BLOCK_FLAG_LAZY_REFCOUNTS;
 }
 
-if (prealloc != PREALLOC_MODE_OFF &&
-prealloc != PREALLOC_MODE_METADATA) {
-ret = -EINVAL;
-error_setg(errp, "Unsupported preallocate mode: %s",
-   PreallocMode_lookup[prealloc]);
-goto finish;
-}
-
 if (backing_file && prealloc != PREALLOC_MODE_OFF) {
 error_setg(errp, "Backing file and preallocation cannot be used at "
"the same time");
@@ -2526,7 +2568,8 @@ static QemuOptsList qcow2_create_opts = {
 {
 .name = BLOCK_OPT_PREALLOC,
 .type = QEMU_OPT_STRING,
-.help = "Preallocation mode (allowed values: off, metadata)"
+.help = "Preallocation mode (allowed values: off, metadata, "
+"falloc, full)"
 },
 {
 .name = BLOCK_OPT_LAZY_REFCOUNTS,
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 1f289d6..ef3be72 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -584,9 +584,11 @@ sizes can improve the image file size whereas larger 
cluster sizes generally
 provide better performance.
 
 @item preallocation
-Preallocation mode (allowed values: off, metadata). An image with preallocated
-metadata is initially larger but can improve performance when the image needs
-to grow.
+Preallocation mode (allowed values: @code{off}, @code{metadata}, @code{falloc},

[Qemu-devel] [PATCH v15 4/5] raw-posix: Add falloc and full preallocation option

2014-09-10 Thread Hu Tao

This patch adds a new option preallocation for raw format, and implements
falloc and full preallocation.

Signed-off-by: Hu Tao 
Reviewed-by: Max Reitz 
---
 block/raw-posix.c | 92 +++
 qemu-doc.texi |  9 ++
 qemu-img.texi |  9 ++
 3 files changed, 91 insertions(+), 19 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 7208c05..a253697 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -30,6 +30,7 @@
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
 #include "raw-aio.h"
+#include "qapi/util.h"
 
 #if defined(__APPLE__) && (__MACH__)
 #include 
@@ -1365,6 +1366,9 @@ static int raw_create(const char *filename, QemuOpts 
*opts, Error **errp)
 int result = 0;
 int64_t total_size = 0;
 bool nocow = false;
+PreallocMode prealloc;
+char *buf = NULL;
+Error *local_err = NULL;
 
 strstart(filename, "file:", &filename);
 
@@ -1372,37 +1376,82 @@ static int raw_create(const char *filename, QemuOpts 
*opts, Error **errp)
 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
   BDRV_SECTOR_SIZE);
 nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
+buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
+prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
+   PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
+   &local_err);
+g_free(buf);
+if (local_err) {
+error_propagate(errp, local_err);
+result = -EINVAL;
+goto out;
+}
 
 fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
0644);
 if (fd < 0) {
 result = -errno;
 error_setg_errno(errp, -result, "Could not create file");
-} else {
-if (nocow) {
+goto out;
+}
+
+if (nocow) {
 #ifdef __linux__
-/* Set NOCOW flag to solve performance issue on fs like btrfs.
- * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value
- * will be ignored since any failure of this operation should not
- * block the left work.
- */
-int attr;
-if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
-attr |= FS_NOCOW_FL;
-ioctl(fd, FS_IOC_SETFLAGS, &attr);
-}
-#endif
+/* Set NOCOW flag to solve performance issue on fs like btrfs.
+ * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value
+ * will be ignored since any failure of this operation should not
+ * block the left work.
+ */
+int attr;
+if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+attr |= FS_NOCOW_FL;
+ioctl(fd, FS_IOC_SETFLAGS, &attr);
 }
+#endif
+}
+
+if (ftruncate(fd, total_size) != 0) {
+result = -errno;
+error_setg_errno(errp, -result, "Could not resize file");
+goto out_close;
+}
 
-if (ftruncate(fd, total_size) != 0) {
-result = -errno;
-error_setg_errno(errp, -result, "Could not resize file");
+if (prealloc == PREALLOC_MODE_FALLOC) {
+/* posix_fallocate() doesn't set errno. */
+result = -posix_fallocate(fd, 0, total_size);
+if (result != 0) {
+error_setg_errno(errp, -result,
+ "Could not preallocate data for the new file");
 }
-if (qemu_close(fd) != 0) {
-result = -errno;
-error_setg_errno(errp, -result, "Could not close the new file");
+} else if (prealloc == PREALLOC_MODE_FULL) {
+buf = g_malloc0(65536);
+int64_t num = 0, left = total_size;
+
+while (left > 0) {
+num = MIN(left, 65536);
+result = write(fd, buf, num);
+if (result < 0) {
+result = -errno;
+error_setg_errno(errp, -result,
+ "Could not write to the new file");
+break;
+}
+left -= num;
 }
+fsync(fd);
+g_free(buf);
+} else if (prealloc != PREALLOC_MODE_OFF) {
+result = -EINVAL;
+error_setg(errp, "Unsupported preallocation mode: %s",
+   PreallocMode_lookup[prealloc]);
 }
+
+out_close:
+if (qemu_close(fd) != 0 && result == 0) {
+result = -errno;
+error_setg_errno(errp, -result, "Could not close the new file");
+}
+out:
 return result;
 }
 
@@ -1585,6 +1634,11 @@ static QemuOptsList raw_create_opts = {
 .type = QEMU_OPT_BOOL,
 .help = "Turn off copy-on-write (valid only on btrfs)"
 },
+{
+.name = BLOCK_OPT_PREALLOC,
+.type = QEMU_OPT_STRING,
+.help = "Preallocation mode (allowed values: off, falloc, full)"
+},
 { /* end of list */ }
 }
 };
diff --git a/qemu-doc.texi b/qemu-do

Re: [Qemu-devel] [PATCH v3] dump: let dump_error return error info to caller

2014-09-10 Thread zhanghailiang


On 2014/9/9 23:10, Eric Blake wrote:

On 09/03/2014 03:54 AM, zhanghailiang wrote:

The second parameter of dump_error is unused, but one purpose of
using this function is to report the error info.

Use error_set to return the error info to the caller.

Signed-off-by: zhanghailiang
---
  V3:
- Drop the '\n' in the message when call dump_error(comment of Eric Blake)
  V2:
- Return the error reason to the caller which suggested by Luiz Capitulino.
---
  dump.c | 165 -
  1 file changed, 82 insertions(+), 83 deletions(-)

diff --git a/dump.c b/dump.c
index 71d3e94..a08a711 100644
--- a/dump.c
+++ b/dump.c
@@ -81,9 +81,10 @@ static int dump_cleanup(DumpState *s)
  return 0;
  }

-static void dump_error(DumpState *s, const char *reason)
+static void dump_error(DumpState *s, Error **errp, const char *reason)


I still think it is unusual to list the errp argument in the middle,
instead of the end.  But not necessarily a show-stopper.



Good point, i will adjust its position to the end;-)




-static int write_elf64_header(DumpState *s)
+static int write_elf64_header(DumpState *s, Error **errp)
  {
  Elf64_Ehdr elf_header;
  int ret;
@@ -126,14 +127,14 @@ static int write_elf64_header(DumpState *s)

  ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
  if (ret<  0) {
-dump_error(s, "dump: failed to write elf header.\n");
+dump_error(s, errp, "dump: failed to write elf header.");


We tend to avoid trailing '.' in error messages



OK, I will remove them.




+static int write_dump_pages(DumpState *s, Error **errp)
  {
  int ret = 0;
  DataCache page_desc, page_data;
@@ -1241,7 +1244,7 @@ static int write_dump_pages(DumpState *s)
  ret = write_cache(&page_data, buf, TARGET_PAGE_SIZE, false);
  g_free(buf);
  if (ret<  0) {
-dump_error(s, "dump: failed to write page data(zero page).\n");
+dump_error(s, errp, "dump: failed to write page data(zero page).");


Pre-existing, but worth fixing: space before () in English sentences.



OK, will fix that, Thanks!

[Qemu-devel] [PATCH v15 0/5] qcow2, raw: add preallocation=full and preallocation=falloc

2014-09-10 Thread Hu Tao

This series adds two preallocation mode to qcow2 and raw:

Option preallocation=full preallocates disk space for image by writing
zeros to disk, this ensures disk space in any cases.

Option preallocation=falloc preallocates disk space by calling
posix_fallocate(). This is faster than preallocation=full.

Note: there is a false positive reported by checkpatch.pl to patch 1.

changes to v14:

  - add detailed commit message to patch 1
  - change the coding style as Eric and Benoît suggested (patch 3)
  - use break as Benoît suggested (patch 4)

changes to v13:

  - rebase (patch 3 in v13 is already in)
  - don't convert file size to sector size in hdev_create(), too (patch 2)
  - reintroduce preallocation=falloc. (patch 3)
  - split the implementation of preallocation=full in v13 into
preallocation=falloc and preallocation=full (patch 4)

changes to v12:

  - remove dependence on minimal_blob_size() (patch 6)
  - remove preallocation=falloc. (patch 4)
  - preallocation=full tries posix_fallocate() first then writing
zeros (patch 5)
  - round up file size for all formats (patch 1)
  - avoid converting file size for more formats (patch 2)

changes to v11:

 - fix test case 049 (patch 4)
 - unsigned nl2e -> uint64_t nl2e (patch 6)
 - use >> instead of / (patch 6)

changes to v10:

  - PreallocMode is moved from file qapi-schema.json to qapi/block-core.json
  - introdues preallocation=falloc, no changes to preallocation=metadata
  - using minimal_blob_size() to calculate metadata size for qcow2
  - indentation fix in file blockdev.c

changes to v9:

 - use ROUND_UP to do round up
 - split the round up into its own patch and add test case
 - new patch rename parse_enum_option to qapi_enum_parse and make it public
 - reuse qapi_enum_parse

changes to v8:

 - round up image file size to nearest sector size
 - dont' blindly lose error info
 - target for 2.1 rather than 2.0
 - and, rebase to latest git tree

changes to v5:

  - add `Since 2.0' to PreallocMode
  - apply total_size change to raw-win32.c as well

changes to v4:

  - fix wrong calculation of qcow2 metadata size in v4
  - remove raw_preallocate2()
  - better error out path in raw_create()
  - fix coding style

changes to v3:

  - remove bdrv_preallocate and make preallocation a
bdrv_create_file option
  - prealloc_mode -> PreallocMode and add it to QAPI
  - fix return value in raw_preallocate2

changes to v2:

  - Fix comments to v2 by Fam.
  - qcow2: first fallocate disk space, then allocate metadata. This avoids
the problem in v2 that bdrv_preallocate may clear all information in
metadata. This does not necessarily map all data clusters sequentially
but does keep information in metadata. Peter, is this acceptable?


Hu Tao (5):
  block: round up file size to nearest sector
  block: don't convert file size to sector size
  qapi: introduce PreallocMode and new PreallocModes full and falloc.
  raw-posix: Add falloc and full preallocation option
  qcow2: Add falloc and full preallocation option

 block/archipelago.c  |   3 +-
 block/cow.c  |   3 +-
 block/gluster.c  |   9 ++--
 block/iscsi.c|   4 +-
 block/nfs.c  |   3 +-
 block/qcow.c |   7 +--
 block/qcow2.c|  82 +--
 block/qed.c  |   3 +-
 block/raw-posix.c| 102 ++-
 block/raw-win32.c|   6 +--
 block/rbd.c  |   3 +-
 block/sheepdog.c |   3 +-
 block/ssh.c  |   3 +-
 block/vdi.c  |   3 +-
 block/vhdx.c |   3 +-
 block/vmdk.c |   3 +-
 block/vpc.c  |   3 +-
 qapi/block-core.json |  17 +++
 qemu-doc.texi|  17 +--
 qemu-img.texi|  17 +--
 tests/qemu-iotests/049.out   |   2 +-
 tests/qemu-iotests/082.out   |  54 ++---
 tests/qemu-iotests/104   |  57 ++
 tests/qemu-iotests/104.out   |  12 +
 tests/qemu-iotests/common.filter |  21 
 tests/qemu-iotests/group |   1 +
 26 files changed, 344 insertions(+), 97 deletions(-)
 create mode 100755 tests/qemu-iotests/104
 create mode 100644 tests/qemu-iotests/104.out

-- 
1.9.3

[Qemu-devel] [PATCH v15 3/5] qapi: introduce PreallocMode and new PreallocModes full and falloc.

2014-09-10 Thread Hu Tao

This patch prepares for the subsequent patches.

Signed-off-by: Hu Tao 
Reviewed-by: Max Reitz 
Reviewed-by: Kevin Wolf 
Reviewed-by: Eric Blake 
---
 block/qcow2.c  | 28 ++--
 qapi/block-core.json   | 17 +
 tests/qemu-iotests/049.out |  2 +-
 3 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index cf27c3f..2d68b51 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -30,6 +30,7 @@
 #include "qemu/error-report.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qbool.h"
+#include "qapi/util.h"
 #include "trace.h"
 #include "qemu/option_int.h"
 
@@ -1738,7 +1739,7 @@ static int preallocate(BlockDriverState *bs)
 
 static int qcow2_create2(const char *filename, int64_t total_size,
  const char *backing_file, const char *backing_format,
- int flags, size_t cluster_size, int prealloc,
+ int flags, size_t cluster_size, PreallocMode prealloc,
  QemuOpts *opts, int version,
  Error **errp)
 {
@@ -1876,7 +1877,7 @@ static int qcow2_create2(const char *filename, int64_t 
total_size,
 }
 
 /* And if we're supposed to preallocate metadata, do that now */
-if (prealloc) {
+if (prealloc == PREALLOC_MODE_METADATA) {
 BDRVQcowState *s = bs->opaque;
 qemu_co_mutex_lock(&s->lock);
 ret = preallocate(bs);
@@ -1915,7 +1916,7 @@ static int qcow2_create(const char *filename, QemuOpts 
*opts, Error **errp)
 uint64_t size = 0;
 int flags = 0;
 size_t cluster_size = DEFAULT_CLUSTER_SIZE;
-int prealloc = 0;
+PreallocMode prealloc;
 int version = 3;
 Error *local_err = NULL;
 int ret;
@@ -1931,12 +1932,11 @@ static int qcow2_create(const char *filename, QemuOpts 
*opts, Error **errp)
 cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
  DEFAULT_CLUSTER_SIZE);
 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-if (!buf || !strcmp(buf, "off")) {
-prealloc = 0;
-} else if (!strcmp(buf, "metadata")) {
-prealloc = 1;
-} else {
-error_setg(errp, "Invalid preallocation mode: '%s'", buf);
+prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
+   PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
+   &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
 ret = -EINVAL;
 goto finish;
 }
@@ -1958,7 +1958,15 @@ static int qcow2_create(const char *filename, QemuOpts 
*opts, Error **errp)
 flags |= BLOCK_FLAG_LAZY_REFCOUNTS;
 }
 
-if (backing_file && prealloc) {
+if (prealloc != PREALLOC_MODE_OFF &&
+prealloc != PREALLOC_MODE_METADATA) {
+ret = -EINVAL;
+error_setg(errp, "Unsupported preallocate mode: %s",
+   PreallocMode_lookup[prealloc]);
+goto finish;
+}
+
+if (backing_file && prealloc != PREALLOC_MODE_OFF) {
 error_setg(errp, "Backing file and preallocation cannot be used at "
"the same time");
 ret = -EINVAL;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 68945c2..fe6e025 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1703,3 +1703,20 @@
 'len'   : 'int',
 'offset': 'int',
 'speed' : 'int' } }
+
+# @PreallocMode
+#
+# Preallocation mode of QEMU image file
+#
+# @off: no preallocation
+# @metadata: preallocate only for metadata
+# @falloc: like @full preallocation but allocate disk space by
+#  posix_fallocate() rather than writing zeros.
+# @full: preallocate all data by writing zeros to device to ensure disk
+#space is really available. @full preallocation also sets up
+#metadata correctly.
+#
+# Since 2.2
+##
+{ 'enum': 'PreallocMode',
+  'data': [ 'off', 'metadata', 'falloc', 'full' ] }
diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out
index 71ca44d..09ca0ae 100644
--- a/tests/qemu-iotests/049.out
+++ b/tests/qemu-iotests/049.out
@@ -179,7 +179,7 @@ qemu-img create -f qcow2 -o preallocation=metadata 
TEST_DIR/t.qcow2 64M
 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off 
cluster_size=65536 preallocation='metadata' lazy_refcounts=off 
 
 qemu-img create -f qcow2 -o preallocation=1234 TEST_DIR/t.qcow2 64M
-qemu-img: TEST_DIR/t.qcow2: Invalid preallocation mode: '1234'
+qemu-img: TEST_DIR/t.qcow2: invalid parameter value: 1234
 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off 
cluster_size=65536 preallocation='1234' lazy_refcounts=off 
 
 == Check encryption option ==
-- 
1.9.3

[Qemu-devel] [PATCH v15 2/5] block: don't convert file size to sector size

2014-09-10 Thread Hu Tao

and avoid converting it back later.

Signed-off-by: Hu Tao 
Reviewed-by: Max Reitz 
Reviewed-by: Benoît Canet 
---
 block/gluster.c   |  9 -
 block/qcow.c  |  8 
 block/qcow2.c | 10 +-
 block/raw-posix.c | 12 ++--
 block/raw-win32.c |  6 +++---
 5 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 65c7a58..1eb3a8c 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -494,8 +494,8 @@ static int qemu_gluster_create(const char *filename,
 goto out;
 }
 
-total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-  BDRV_SECTOR_SIZE);
+total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+  BDRV_SECTOR_SIZE);
 
 tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
 if (!tmp || !strcmp(tmp, "off")) {
@@ -516,9 +516,8 @@ static int qemu_gluster_create(const char *filename,
 if (!fd) {
 ret = -errno;
 } else {
-if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) {
-if (prealloc && qemu_gluster_zerofill(fd, 0,
-total_size * BDRV_SECTOR_SIZE)) {
+if (!glfs_ftruncate(fd, total_size)) {
+if (prealloc && qemu_gluster_zerofill(fd, 0, total_size)) {
 ret = -errno;
 }
 } else {
diff --git a/block/qcow.c b/block/qcow.c
index 041af26..a87bd69 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -725,8 +725,8 @@ static int qcow_create(const char *filename, QemuOpts 
*opts, Error **errp)
 BlockDriverState *qcow_bs;
 
 /* Read out options */
-total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-  BDRV_SECTOR_SIZE);
+total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+  BDRV_SECTOR_SIZE);
 backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
 if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
 flags |= BLOCK_FLAG_ENCRYPT;
@@ -754,7 +754,7 @@ static int qcow_create(const char *filename, QemuOpts 
*opts, Error **errp)
 memset(&header, 0, sizeof(header));
 header.magic = cpu_to_be32(QCOW_MAGIC);
 header.version = cpu_to_be32(QCOW_VERSION);
-header.size = cpu_to_be64(total_size * 512);
+header.size = cpu_to_be64(total_size);
 header_size = sizeof(header);
 backing_filename_len = 0;
 if (backing_file) {
@@ -776,7 +776,7 @@ static int qcow_create(const char *filename, QemuOpts 
*opts, Error **errp)
 }
 header_size = (header_size + 7) & ~7;
 shift = header.cluster_bits + header.l2_bits;
-l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
+l1_size = (total_size + (1LL << shift) - 1) >> shift;
 
 header.l1_table_offset = cpu_to_be64(header_size);
 if (flags & BLOCK_FLAG_ENCRYPT) {
diff --git a/block/qcow2.c b/block/qcow2.c
index c8050e5..cf27c3f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1859,7 +1859,7 @@ static int qcow2_create2(const char *filename, int64_t 
total_size,
 }
 
 /* Okay, now that we have a valid image, let's give it the right size */
-ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
+ret = bdrv_truncate(bs, total_size);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not resize image");
 goto out;
@@ -1912,7 +1912,7 @@ static int qcow2_create(const char *filename, QemuOpts 
*opts, Error **errp)
 char *backing_file = NULL;
 char *backing_fmt = NULL;
 char *buf = NULL;
-uint64_t sectors = 0;
+uint64_t size = 0;
 int flags = 0;
 size_t cluster_size = DEFAULT_CLUSTER_SIZE;
 int prealloc = 0;
@@ -1921,8 +1921,8 @@ static int qcow2_create(const char *filename, QemuOpts 
*opts, Error **errp)
 int ret;
 
 /* Read out options */
-sectors = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-   BDRV_SECTOR_SIZE);
+size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+BDRV_SECTOR_SIZE);
 backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
 backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
 if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
@@ -1972,7 +1972,7 @@ static int qcow2_create(const char *filename, QemuOpts 
*opts, Error **errp)
 goto finish;
 }
 
-ret = qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
+ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
 cluster_size, prealloc, opts, version, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 9c22e3f..7208c05 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1369,8 +1369,8 @@ static int raw_create(const char *filename, QemuOpts 
*opts, Error **errp)
 strstart(filename, "

Re: [Qemu-devel] [Qemu-ppc] [PATCH 1/3] target-ppc : Add floating point ability to 440x5 PPC CPU

2014-09-10 Thread Alexander Graf



On 10.09.14 07:03, Pierre Mallard wrote:
> This patch add some floating point operation for PPC440x5.
> Compile with PPC440x5_HAVE_FPU enabled in configure extra-cflags
> 
> Signed-off-by: Pierre Mallard 

Instead of the define, could we just create a new CPU that has these
flags enabled? Just call it "440x5-fpu" or so.


Alex

> ---
>  target-ppc/translate_init.c |4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 48177ed..b4dedce 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -3897,6 +3897,10 @@ POWERPC_FAMILY(440x5)(ObjectClass *oc, void *data)
>  pcc->init_proc = init_proc_440x5;
>  pcc->check_pow = check_pow_nocheck;
>  pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
> +#ifdef PPC440x5_HAVE_FPU
> +   PPC_FLOAT | PPC_FLOAT_FSQRT | 
> +   PPC_FLOAT_STFIWX |
> +#endif
> PPC_DCR | PPC_WRTEE | PPC_RFMCI |
> PPC_CACHE | PPC_CACHE_ICBI |
> PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
>

Re: [Qemu-devel] [PATCH 1/6] hw/arm/virt: Provide flash devices for boot ROMs

2014-09-10 Thread Peter Maydell

On 10 September 2014 10:09, Ard Biesheuvel  wrote:
> On 9 September 2014 20:20, Peter Maydell  wrote:
>> There were previously questions about whether we should
>> have flash or RAM at the bottom, but I think it makes
>> sense just to have a "like vexpress" config with two
>> flash devices. This does make telling QEMU about backing
>> storage for the 2nd flash a little complicated, but I
>> think anybody seriously running a config like that will
>> be using the management tools layer anyhow.

> You mean having to use -pflash and pad the images out to 64 MB? I
> wouldn't worry about that.

More particularly that if you don't want to provide backing
storage for the first flash but only the second, you can't just
use pflash but have to use the longer -drive options.

-- PMM

Re: [Qemu-devel] [PATCH] virtio-pci: enable bus master for old guests

2014-09-10 Thread Nikunj A Dadhania

Greg Kurz  writes:

>> > > I did some debugging: it looks like the guest kernel calls the OF
>> > > quisece call to flush pending DMA and disables bus master on the
>> > > virtio-blk device (PCI_COMMAND == 0x3).
>> > 
>> > Getting confused, above you are talking about virtio-net and here it is
>> > virtio-blk.
>> > 
>> > Anyways, the routines still remains same for both of them.  From SLOF
>> > during init we set DRIVER_OK, and after using the device during the
>> > quiesce, called from linux kernel VIRTIO_CONFIG_S_FAILED is set and then
>> > a VIRTIO_DEVICE_RESET is done.
>> 
>> BTW, you really should start enabling bus mastering, avoid relying
>> on the work-around we have for broken guests.
>> 
>
> FWIW during my debug session, I see that SLOF enables bus mastering...
> unfortunately, it gets disabled at some point after the guest kernel
> is started (around the ppc64 prom_init() call).

Is it before quiesce call?

Regards
Nikunj

Re: [Qemu-devel] [PATCH] virtio-pci: enable bus master for old guests

2014-09-10 Thread Nikunj A Dadhania

"Michael S. Tsirkin"  writes:

> On Wed, Sep 10, 2014 at 01:44:49PM +0530, Nikunj A Dadhania wrote:
>> Greg Kurz  writes:
>> 
>> > On Mon, 8 Sep 2014 19:05:02 +0300
>> > "Michael S. Tsirkin"  wrote:
>> >
>> >> commit cc943c36faa192cd4b32af8fe5edb31894017d35
>> >> pci: Use bus master address space for delivering MSI/MSI-X messages
>> >> breaks virtio-net for rhel6.[56] x86 guests because they don't
>> >> enable bus mastering for virtio PCI devices
>> >> 
>> >> Old guests forgot to enable bus mastering, enable it
>> >> automatically on DRIVER_OK.
>> >> 
>> >> Note: we should either back out the original patch from
>> >> stable or apply this one on top.
>> >> 
>> >> Cc: qemu-sta...@nongnu.org
>> >> Reported-by: Greg Kurz 
>> >> Signed-off-by: Jan Kiszka 
>> >> Signed-off-by: Michael S. Tsirkin 
>> >> ---
>> >>  hw/virtio/virtio-pci.c | 2 ++
>> >>  1 file changed, 2 insertions(+)
>> >> 
>> >> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>> >> index ddb5da1..af937d2 100644
>> >> --- a/hw/virtio/virtio-pci.c
>> >> +++ b/hw/virtio/virtio-pci.c
>> >> @@ -320,6 +320,8 @@ static void virtio_ioport_write(void *opaque, 
>> >> uint32_t addr, uint32_t val)
>> >>  if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
>> >>  !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
>> >>  proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
>> >> +
>> >> memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
>> >> +  true);
>> >>  }
>> >>  break;
>> >>  case VIRTIO_MSI_CONFIG_VECTOR:
>> >
>> > Cc'ing Alexey for some SLOF and early boot of the ppc64 kernel expertise.
>> >
>> > Michael,
>> >
>> > This was enough to fix virtio-net in the rhel6.5 x86 guest case. 
>> > Unfortunately,
>> > this fails for rhel6.5 ppc64 because it is never called... 
>> 
>> > I did some debugging: it looks like the guest kernel calls the OF
>> > quisece call to flush pending DMA and disables bus master on the
>> > virtio-blk device (PCI_COMMAND == 0x3).
>> 
>> Getting confused, above you are talking about virtio-net and here it is
>> virtio-blk.
>> 
>> Anyways, the routines still remains same for both of them.  From SLOF
>> during init we set DRIVER_OK, and after using the device during the
>> quiesce, called from linux kernel VIRTIO_CONFIG_S_FAILED is set and then
>> a VIRTIO_DEVICE_RESET is done.
>
> BTW, you really should start enabling bus mastering, avoid relying
> on the work-around we have for broken guests.

In SLOF, we do enable PCI MASTER during device scanning and then later
disable it.

Regards
Nikunj

Re: [Qemu-devel] [PATCH 09/10] piix: do not raise irq while loading vmstate

2014-09-10 Thread Michael S. Tsirkin

On Wed, Sep 10, 2014 at 11:05:39AM +0200, Paolo Bonzini wrote:
> Il 10/09/2014 10:51, Peter Maydell ha scritto:
> > > What is not okay (and I think it should be a rule) is to touch other
> > > devices from post_load, unless you know that they are deserialized
> > > first.  For example it's okay for a PCI device to talk to the parent
> > > bridge in its post_load function.
> > 
> > I don't think it's right to talk to another device even if you do
> > know it's deserialized first. Talking to it might make it change
> > its state, which would be wrong (since its correct state is
> > the state it's just deserialized). I would suggest the rule should
> > be "never do something that can change the state of another
> > device in post-load".
> 
> That's harder to do, but if it is possible to do it, it would be great
> as well.
> 
> It would not surprise me to find a case where the parent device actually
> _expects_ the children's post_load to inform it about something, instead
> of serializing that part of state on its own.
> 
> Paolo

Absolutely, I don't think we can require that.

For example, at the moment, for PCI bridges, we serialize the
state of all interrupt lines, but that's just a function
of all devices connected to each line.
So we are transmitting redundant information, and I have plans
to discard that and recompute parent state based on child state.



> > (We have similar issues with reset, except worse in that we
> > don't have a coherent rule to cause everything to come out
> > of reset in the right state.)

Re: [Qemu-devel] [Qemu-ppc] [PATCH 0/3] Enabling floating point instruction to 440x5 CPUs

2014-09-10 Thread Alexander Graf

On 10.09.14 07:03, Pierre Mallard wrote:
> This patch series enable floating point instruction in 440x5 CPUs 
> which have the capabilities to have optional APU FPU.
> 
> 1) Add floating point standard insns flag to 440x5 in case there is an apu 
> fpu.
> 2) Define a new floating point insns flag for operation 
> previously reserved to 64 bits proc (fcfid, fctid, fctidz)
> 3) Apply this new flag to fcfid, fctid, fctidz and move TARGET_PPC64 
> restrictions

I've looked through the patches mostly from a stylistic point of view.
As for whether the changes are technically correct and fully adhere to
the specs, I haven't verified anything and would leave that part to Tom :).

Alex

Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/3] target-ppc : Add PPC_FLOAT_64 flag to instructions type

2014-09-10 Thread Alexander Graf



On 10.09.14 07:03, Pierre Mallard wrote:
> This patch declare a new floating point instruction flag PPC_FLOAT_64 to be 
> used
> by fcfid, fctid[z] operations. Note that due to limited number of bit, 
> FSEL and FRES points now to same value, and PPC_FLOAT_64 to former FSEL 
> value. 
> (There seems to be no case where FSEL and FRES are not used together at the 
> moment)
> 
> Signed-off-by: Pierre Mallard 
> ---
>  target-ppc/cpu.h|7 +--
>  target-ppc/translate_init.c |2 +-
>  2 files changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index b64c652..b5b3912 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1868,9 +1868,12 @@ enum {
>  PPC_FLOAT_FRES = 0x0008ULL,
>  PPC_FLOAT_FRSQRTE  = 0x0010ULL,
>  PPC_FLOAT_FRSQRTES = 0x0020ULL,
> -PPC_FLOAT_FSEL = 0x0040ULL,
> +PPC_FLOAT_FSEL = 0x0008ULL,
>  PPC_FLOAT_STFIWX   = 0x0080ULL,
>  
> +/* Use for PPC with double precision fpu */
> +PPC_FLOAT_64   = 0x0040ULL,

Please keep the list sorted by the bit number. Also I think we're better
off not having the same bit used for 2 enums. Just keep PPC_FLOAT_FRES
and make FSEL depend on the FRES bit in translate.c

> +
>  /* Vector/SIMD extensions
> */
>  /*   Altivec support 
> */
>  PPC_ALTIVEC= 0x0100ULL,
> @@ -1957,7 +1960,7 @@ enum {
>  | PPC_STRING | PPC_FLOAT | PPC_FLOAT_EXT \
>  | PPC_FLOAT_FSQRT | PPC_FLOAT_FRES \
>  | PPC_FLOAT_FRSQRTE | PPC_FLOAT_FRSQRTES \
> -| PPC_FLOAT_FSEL | PPC_FLOAT_STFIWX \
> +| PPC_FLOAT_FSEL | PPC_FLOAT_STFIWX | PPC_FLOAT_64 \
>  | PPC_ALTIVEC | PPC_SPE | PPC_SPE_SINGLE \
>  | PPC_SPE_DOUBLE | PPC_MEM_TLBIA \
>  | PPC_MEM_TLBIE | PPC_MEM_TLBSYNC \
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index b4dedce..073bef1 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -3899,7 +3899,7 @@ POWERPC_FAMILY(440x5)(ObjectClass *oc, void *data)
>  pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
>  #ifdef PPC440x5_HAVE_FPU
> PPC_FLOAT | PPC_FLOAT_FSQRT | 
> -   PPC_FLOAT_STFIWX |
> +   PPC_FLOAT_STFIWX | PPC_FLOAT_64 |
>  #endif
> PPC_DCR | PPC_WRTEE | PPC_RFMCI |
> PPC_CACHE | PPC_CACHE_ICBI |
>

Re: [Qemu-devel] [PATCH v5 00/22] block: Asynchronous request cancellation

2014-09-10 Thread Fam Zheng

On Wed, 09/10 17:09, Bin Wu wrote:
> On 2014/9/10 13:59, Fam Zheng wrote:
> >v5: Fix IDE callback. (Paolo)
> > Fix blkdebug. (Paolo)
> > Drop the DMA fix which is independent of this series. (Paolo)
> > Incorperate Yuan's patch on quorum_aio_cancel. (Benoît)
> > Commit message wording fix. (Benoît)
> > Rename qemu_aio_release to qemu_aio_unref. (Benoît)
> >
> >v4: Drop AIOCBInfo.cancel.
> >
> >This series adds a new block layer API:
> >
> >   void bdrv_aio_cancel_async(BlockDriverAIOCB *acb);
> >
> >And use it to emulate bdrv_aio_cancel.
> >
> >The function is similar to bdrv_aio_cancel in that it cancels an AIO request,
> >but different that it doesn't block until the request is completely cancelled
> >or done.
> >
> >More importantly, the completion callback, BlockDriverAIOCB.cb, is guaranteed
> >to be called, so that the cb can take care of resource releasing and status
> >reporting to guest, etc.
> >
> >In the following work, scsi emulation code will be shifted to use the async
> >cancelling.
> >
> >One major benefit would be that when guest tries to cancel a request, where 
> >the
> >request cannot be cancelled easily, (due to throttled BlockDriverState, a 
> >lost
> >connection, or a large request queue), we don't need to block the whole vm 
> >with
> >a busy loop, which is how bdrv_aio_cancel is implemented now.
> 
> First, I think this series is really useful. However, I tested the v4 series
> and found virtio-scsi disk(scsi-hd) was still blocked when the IO could not
> come back because of virtio_scsi_do_tmf->scsi_cancel_io->bdrv_aio_cancel.
> can we just change the bdrv_aio_cancel to bdrv_aio_cancel_async to solve
> this problem?

We can't just change it yet. We need to take care of the scsi_req_unref there,
if we unref right after bdrv_aio_cancel_async, in the cb it will access
dangling pointer.

It's not trivial enough to do in this series, I will work on that on top, as
said below. For now, the user visible behavior of io cancellation is still the
same.

Thanks for testing.
Fam

> >
> >Later, we will change scsi device code to make this asynchronous, on top of
> >bdrv_aio_cancel_async.

Re: [Qemu-devel] [Qemu-ppc] [PATCH 3/3] target-ppc : Add PPC_FLOAT_64 type to fctid, fctidz and fcfid and remove their TARGET_PPC64 restriction

2014-09-10 Thread Alexander Graf



On 10.09.14 07:03, Pierre Mallard wrote:
> Apply the new PPC_FLOAT_64 flag to fctid[z] and fcfid. 
> May also be applyed to fctidu[z] and fcfid[su][z], but since they are not 
> mentionned in xilinx documentation it might not be needed yet.
> 
> Signed-off-by: Pierre Mallard 
> ---
>  target-ppc/fpu_helper.c |7 +++
>  target-ppc/helper.h |6 --
>  target-ppc/translate.c  |   20 
>  3 files changed, 19 insertions(+), 14 deletions(-)
> 
> diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> index da93d12..4e0e9e2 100644
> --- a/target-ppc/fpu_helper.c
> +++ b/target-ppc/fpu_helper.c
> @@ -649,15 +649,13 @@ FPU_FCTI(fctiw, int32, 0x8000U)
>  FPU_FCTI(fctiwz, int32_round_to_zero, 0x8000U)
>  FPU_FCTI(fctiwu, uint32, 0xU)
>  FPU_FCTI(fctiwuz, uint32_round_to_zero, 0xU)
> -#if defined(TARGET_PPC64)
>  FPU_FCTI(fctid, int64, 0x8000ULL)
>  FPU_FCTI(fctidz, int64_round_to_zero, 0x8000ULL)
> +#if defined(TARGET_PPC64)
>  FPU_FCTI(fctidu, uint64, 0xULL)
>  FPU_FCTI(fctiduz, uint64_round_to_zero, 0xULL)
>  #endif
>  
> -#if defined(TARGET_PPC64)
> -
>  #define FPU_FCFI(op, cvtr, is_single)  \
>  uint64_t helper_##op(CPUPPCState *env, uint64_t arg)   \
>  {  \
> @@ -674,10 +672,11 @@ uint64_t helper_##op(CPUPPCState *env, uint64_t arg)
>\
>  }
>  
>  FPU_FCFI(fcfid, int64_to_float64, 0)
> +
> +#if defined(TARGET_PPC64)
>  FPU_FCFI(fcfids, int64_to_float32, 1)
>  FPU_FCFI(fcfidu, uint64_to_float64, 0)
>  FPU_FCFI(fcfidus, uint64_to_float32, 1)
> -
>  #endif
>  
>  static inline uint64_t do_fri(CPUPPCState *env, uint64_t arg,
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 509eae5..e51aa69 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -67,14 +67,16 @@ DEF_HELPER_2(fctiw, i64, env, i64)
>  DEF_HELPER_2(fctiwu, i64, env, i64)
>  DEF_HELPER_2(fctiwz, i64, env, i64)
>  DEF_HELPER_2(fctiwuz, i64, env, i64)
> -#if defined(TARGET_PPC64)
>  DEF_HELPER_2(fcfid, i64, env, i64)
> +#if defined(TARGET_PPC64)
>  DEF_HELPER_2(fcfidu, i64, env, i64)
>  DEF_HELPER_2(fcfids, i64, env, i64)
>  DEF_HELPER_2(fcfidus, i64, env, i64)
> +#endif
>  DEF_HELPER_2(fctid, i64, env, i64)
> -DEF_HELPER_2(fctidu, i64, env, i64)
>  DEF_HELPER_2(fctidz, i64, env, i64)
> +#if defined(TARGET_PPC64)
> +DEF_HELPER_2(fctidu, i64, env, i64)
>  DEF_HELPER_2(fctiduz, i64, env, i64)
>  #endif
>  DEF_HELPER_2(frsp, i64, env, i64)
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index c07bb01..6af25fe 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -2246,21 +2246,23 @@ GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT);
>  GEN_FLOAT_B(ctiwuz, 0x0F, 0x04, 0, PPC2_FP_CVT_ISA206);
>  /* frsp */
>  GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT);
> -#if defined(TARGET_PPC64)
>  /* fcfid */
> -GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_64B);
> +GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_FLOAT_64|PPC_64B);
> +#if defined(TARGET_PPC64)
>  /* fcfids */
>  GEN_FLOAT_B(cfids, 0x0E, 0x1A, 0, PPC2_FP_CVT_ISA206);
>  /* fcfidu */
>  GEN_FLOAT_B(cfidu, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
>  /* fcfidus */
>  GEN_FLOAT_B(cfidus, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
> +#endif
>  /* fctid */
> -GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_64B);
> +GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_FLOAT_64|PPC_64B);
> +/* fctidz */
> +GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC_FLOAT_64|PPC_64B);
> +#if defined(TARGET_PPC64)
>  /* fctidu */
>  GEN_FLOAT_B(ctidu, 0x0E, 0x1D, 0, PPC2_FP_CVT_ISA206);
> -/* fctidz */
> -GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC_64B);
>  /* fctidu */
>  GEN_FLOAT_B(ctiduz, 0x0F, 0x1D, 0, PPC2_FP_CVT_ISA206);
>  #endif
> @@ -10050,14 +10052,16 @@ GEN_HANDLER_E(fctiwu, 0x3F, 0x0E, 0x04, 0, 
> PPC_NONE, PPC2_FP_CVT_ISA206),
>  GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT),
>  GEN_HANDLER_E(fctiwuz, 0x3F, 0x0F, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
>  GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT),
> +GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_FLOAT_64|PPC_64B),
>  #if defined(TARGET_PPC64)
> -GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_64B),
>  GEN_HANDLER_E(fcfids, 0x3B, 0x0E, 0x1A, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
>  GEN_HANDLER_E(fcfidu, 0x3F, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
>  GEN_HANDLER_E(fcfidus, 0x3B, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
> -GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_64B),
> +#endif
> +GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_FLOAT_64|PPC_64B),
> +GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC_FLOAT_64|PPC_64B),

I think we're better off with only a single bit. Just make all 64bit
CPUs that have an FPU also set PPC_FLOAT_64 and only check for that.


Alex

Re: [Qemu-devel] [PATCH v3 1/6] hw/misc/dyn_sysbus_binding: helpers for sysbus device dynamic binding

2014-09-10 Thread Alexander Graf



On 09.09.14 17:59, Paolo Bonzini wrote:
> Il 09/09/2014 17:25, Eric Auger ha scritto:

 Please provide a wrapper like sysbus_register_dynamic that takes the
 params/address_space_mem/mpic as parameter.  platform_bus_init_notify
 and DynSysbusNotifier can remain hidden within the .c file.
>> Sorry I do not catch what you mean here. platform_bus_init_notify &
>> DynSysbusNotifier are currently used in virt.c to initialize & register
>> the machine_init_done_notifier.
> 
> Yeah, please do the registration in sysbus.c, not in virt.c.  There is
> no reason to make the platform_bus_init_notify+DynSysbusNotifier
> interface public.  The code in sysbus.c can fill in the fields.

Sysbus != Platform bus. Sysbus is an in-QEMU representation of a
pseudo-bus that we put all devices onto that we consider unsorted.

Platform bus is a machine representation of an actual bus that devices
are attached to. These devices usually are sysbus devices.


Alex

Re: [Qemu-devel] [PATCH v5 05/22] block: Convert bdrv_em_aiocb_info.cancel to .cancel_async

2014-09-10 Thread Fam Zheng

On Wed, 09/10 10:20, Paolo Bonzini wrote:
> Il 10/09/2014 07:59, Fam Zheng ha scritto:
> > @@ -4679,6 +4679,9 @@ static void bdrv_aio_cancel_em(BlockDriverAIOCB 
> > *blockacb)
> >  {
> >  BlockDriverAIOCBSync *acb =
> >  container_of(blockacb, BlockDriverAIOCBSync, common);
> > +
> > +acb->ret = -ECANCELED;
> > +acb->common.cb(acb->common.opaque, acb->ret);
> >  qemu_bh_delete(acb->bh);
> >  acb->bh = NULL;
> >  qemu_aio_release(acb);
> 
> This could call the callback before I/O is finished.  I/O can then
> complete and write to disk stuff that was not meant to be written.

I think the request is already completed when bdrv_aio_rw_vector returns this
blockacb. I shouldn't override the return code anyway, but perhaps a nop
bdrv_aio_cancel_em is better.

> 
> I think there is a pre-existing bug, which should be fixed with a "bool
> *done" member similar to BlockDriverAIOCBCoroutine's.  But for the sake
> of conversion to async cancellation, you can just empty bdrv_aio_cancel_em.
> 

BTW, why is it "bool *done" instead of just "bool done"?

Fam

Re: [Qemu-devel] [PATCH v3 1/6] hw/misc/dyn_sysbus_binding: helpers for sysbus device dynamic binding

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 11:31, Alexander Graf ha scritto:
>> > Yeah, please do the registration in sysbus.c, not in virt.c.  There is
>> > no reason to make the platform_bus_init_notify+DynSysbusNotifier
>> > interface public.  The code in sysbus.c can fill in the fields.
> Sysbus != Platform bus. Sysbus is an in-QEMU representation of a
> pseudo-bus that we put all devices onto that we consider unsorted.
> 
> Platform bus is a machine representation of an actual bus that devices
> are attached to. These devices usually are sysbus devices.

Is there any difference between the two?

Take a machine that has two chips, a SoC that does everything except
USB, and a USB controller chip.

Strictly speaking the USB controller chip would be on a "platform bus",
but we would likely put it on sysbus.

Why should it matter whether the devices are static or dynamic, for the
sake of calling something the "system" or the "platform" bus?  I would
say that QEMU calls "sysbus" the platform bus.

Some devices (e.g. the local APIC in x86, or the in-core timers and GIC
in ARM) should probably not be in sysbus at all, and should attach
directly to the CPU address space.  But that is a quirk in the modeling
of those devices, it shouldn't mean that sysbus is not a "platform" bus.

Paolo

Re: [Qemu-devel] [PATCH v5 05/22] block: Convert bdrv_em_aiocb_info.cancel to .cancel_async

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 11:36, Fam Zheng ha scritto:
>> > 
>> > This could call the callback before I/O is finished.  I/O can then
>> > complete and write to disk stuff that was not meant to be written.
> I think the request is already completed when bdrv_aio_rw_vector returns this
> blockacb. I shouldn't override the return code anyway, but perhaps a nop
> bdrv_aio_cancel_em is better.

Note that the legacy bdrv_read/bdrv_write function calls actually are
AIO-friendly (they run in a coroutine, and can yield).

> > I think there is a pre-existing bug, which should be fixed with a "bool
> > *done" member similar to BlockDriverAIOCBCoroutine's.  But for the sake
> > of conversion to async cancellation, you can just empty bdrv_aio_cancel_em.
> 
> BTW, why is it "bool *done" instead of just "bool done"?

Because, until your patches to add reference counting, this would have
caused a dangling pointer in bdrv_aio_co_cancel_em:

acb->done = true;
qemu_bh_delete(acb->bh);
qemu_aio_release(acb);

instead, using "bool *done" works because bdrv_co_em_bh writes into the
variable of bdrv_aio_co_cancel_em.  This assumes that bdrv_aio_cancel is
only called once (no reentrancy).

Paolo

Re: [Qemu-devel] [PATCH 09/10] piix: do not raise irq while loading vmstate

2014-09-10 Thread Michael S. Tsirkin

On Wed, Sep 10, 2014 at 10:38:46AM +0200, Paolo Bonzini wrote:
> Il 09/09/2014 22:51, Michael S. Tsirkin ha scritto:
> > > i440FX/PIIX3 state is loaded before i8259, so the interrupt will never
> > > be in the i8259 ISR.  I am not sure why it is a problem for
> > > record/replay, but I think it's plausible to consider this a bug.  i8259
> > > state should not be affected by the load of PIIX3 state, since i8259 is
> > > migrated separately.
> > 
> > Sorry I still don't understand. Why do stuff from vmstate callback then?
> > How is it different?
> 
> Reconstructing internal state from post_load is okay.
> 
> What is not okay (and I think it should be a rule) is to touch other
> devices from post_load, unless you know that they are deserialized
> first.  For example it's okay for a PCI device to talk to the parent
> bridge in its post_load function.
> 
> In the case of PIIX3 vs. i8259, however, you know that i8259 is
> deserialized _last_ because i8259 is an ISA device and PIIX3 provides
> the ISA bus.  So it's incorrect, even though it's currently harmless, to
> touch the i8259 before it's deserialized.

OK, got this, thanks for the explanation!
So the reason i8259 might be out of sync is
because it's not yet deserialized.

I think it's a good idea to put (at least the
last part) in the commit log.
Also it's updating irq state, not just raising irq,
that might be problematic, right?

So also, something like this for the comment:
+/* We update irq levels in PIIX3 but don't set IRQ, since
+ * IRQ state is serialized separately through the i8259,
+ * which is not deserialized yet, at this point.
+ */





> > I'd like to see a description of a scenario where this patch makes
> > a difference.
> 
> Of course it would be nice to have testcases for this, but I guess one
> case could be:
> 
> - LAPIC configured in ExtINT mode
> 
> - interrupts are masked in the i8259, but the i8259 doesn't know that
> yet because it's not been loaded yet
> 
> - the PIIX3 loads the state and the interrupt is set.  pic_set_irq is
> called, calls pic_update_irq
> 
> - pic_update_irq calls pic_get_irq, which uses IMR=0 and thus raises LINT0
> 
> - the APIC has been loaded already, so LINT0 is injected incorrectly
> 
> 
> Another case could be:
> 
> - i8259 is processing IRQ0.  The lower-priority interrupt from PIIX3 is
> in IRR.  Machine is migrated.
> 
> - the PIIX3 loads the state and sets the interrupt in the i8259.
> pic_set_irq is called, calls pic_update_irq, calls pic_get_irq
> 
> - because i8259 has not been loaded yet, pic_get_irq sees ISR=0 and the
> interrupt is injected even though IRQ0 (higher priority) is being serviced.
> 
> 
> In both cases, the saved i8259 state will have the PIIX3 interrupt in
> IRR, so the interrupt is not lost, just held (as it would have been on
> the source machine).
> 
> Paolo

Re: [Qemu-devel] [PATCH] virtio-pci: enable bus master for old guests

2014-09-10 Thread Michael S. Tsirkin

On Wed, Sep 10, 2014 at 02:45:51PM +0530, Nikunj A Dadhania wrote:
> "Michael S. Tsirkin"  writes:
> 
> > On Wed, Sep 10, 2014 at 01:44:49PM +0530, Nikunj A Dadhania wrote:
> >> Greg Kurz  writes:
> >> 
> >> > On Mon, 8 Sep 2014 19:05:02 +0300
> >> > "Michael S. Tsirkin"  wrote:
> >> >
> >> >> commit cc943c36faa192cd4b32af8fe5edb31894017d35
> >> >> pci: Use bus master address space for delivering MSI/MSI-X messages
> >> >> breaks virtio-net for rhel6.[56] x86 guests because they don't
> >> >> enable bus mastering for virtio PCI devices
> >> >> 
> >> >> Old guests forgot to enable bus mastering, enable it
> >> >> automatically on DRIVER_OK.
> >> >> 
> >> >> Note: we should either back out the original patch from
> >> >> stable or apply this one on top.
> >> >> 
> >> >> Cc: qemu-sta...@nongnu.org
> >> >> Reported-by: Greg Kurz 
> >> >> Signed-off-by: Jan Kiszka 
> >> >> Signed-off-by: Michael S. Tsirkin 
> >> >> ---
> >> >>  hw/virtio/virtio-pci.c | 2 ++
> >> >>  1 file changed, 2 insertions(+)
> >> >> 
> >> >> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> >> >> index ddb5da1..af937d2 100644
> >> >> --- a/hw/virtio/virtio-pci.c
> >> >> +++ b/hw/virtio/virtio-pci.c
> >> >> @@ -320,6 +320,8 @@ static void virtio_ioport_write(void *opaque, 
> >> >> uint32_t addr, uint32_t val)
> >> >>  if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
> >> >>  !(proxy->pci_dev.config[PCI_COMMAND] & 
> >> >> PCI_COMMAND_MASTER)) {
> >> >>  proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
> >> >> +
> >> >> memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
> >> >> +  true);
> >> >>  }
> >> >>  break;
> >> >>  case VIRTIO_MSI_CONFIG_VECTOR:
> >> >
> >> > Cc'ing Alexey for some SLOF and early boot of the ppc64 kernel expertise.
> >> >
> >> > Michael,
> >> >
> >> > This was enough to fix virtio-net in the rhel6.5 x86 guest case. 
> >> > Unfortunately,
> >> > this fails for rhel6.5 ppc64 because it is never called... 
> >> 
> >> > I did some debugging: it looks like the guest kernel calls the OF
> >> > quisece call to flush pending DMA and disables bus master on the
> >> > virtio-blk device (PCI_COMMAND == 0x3).
> >> 
> >> Getting confused, above you are talking about virtio-net and here it is
> >> virtio-blk.
> >> 
> >> Anyways, the routines still remains same for both of them.  From SLOF
> >> during init we set DRIVER_OK, and after using the device during the
> >> quiesce, called from linux kernel VIRTIO_CONFIG_S_FAILED is set and then
> >> a VIRTIO_DEVICE_RESET is done.
> >
> > BTW, you really should start enabling bus mastering, avoid relying
> > on the work-around we have for broken guests.
> 
> In SLOF, we do enable PCI MASTER during device scanning and then later
> disable it.
> 
> Regards
> Nikunj

But device is then reset, right Greg?
You get as far as reset?

If yes I doubt something that happens before reset
matters, unless we are leaking some state
across reset which would be a bug in itself.

Re: [Qemu-devel] [PATCH 02/23] block: New BlockBackend

2014-09-10 Thread Kevin Wolf

Am 10.09.2014 um 10:13 hat Markus Armbruster geschrieben:
> A block device consists of a frontend device model and a backend.
> 
> A block backend has a tree of block drivers doing the actual work.
> The tree is managed by the block layer.
> 
> We currently use a single abstraction BlockDriverState both for tree
> nodes and the backend as a whole.  Drawbacks:
> 
> * Its API includes both stuff that makes sense only at the block
>   backend level (root of the tree) and stuff that's only for use
>   within the block layer.  This makes the API bigger and more complex
>   than necessary.  Moreover, it's not obvious which interfaces are
>   meant for device models, and which really aren't.
> 
> * Since device models keep a reference to their backend, the backend
>   object can't just be destroyed.  But for media change, we need to
>   replace the tree.  Our solution is to make the BlockDriverState
>   generic, with actual driver state in a separate object, pointed to
>   by member opaque.  That lets us replace the tree by deinitializing
>   and reinitializing its root.  This special need of the root makes
>   the data structure awkward everywhere in the tree.
> 
> The general plan is to separate the APIs into "block backend", for use
> by device models, monitor and whatever other code dealing with block
> backends, and "block driver", for use by the block layer and whatever
> other code (if any) dealing with trees and tree nodes.
> 
> Code dealing with block backends, device models in particular, should
> become completely oblivious of BlockDriverState.  This should let us
> clean up both APIs, and the tree data structures.
> 
> This commit is a first step.  It creates a minimal "block backend"
> API: type BlockBackend and functions to create, destroy and find them.
> BlockBackend objects are created and destroyed, but not yet used for
> anything; that'll come shortly.
> 
> BlockBackend is reference-counted.  Its reference count never exceeds
> one so far, but that's going to change.
> 
> Signed-off-by: Markus Armbruster 
> ---
>  block/Makefile.objs|   2 +-
>  block/block-backend.c  | 110 
> +
>  blockdev.c |  10 +++-
>  hw/block/xen_disk.c|  11 +
>  include/qemu/typedefs.h|   1 +
>  include/sysemu/block-backend.h |  26 ++
>  qemu-img.c |  46 +
>  qemu-io.c  |   8 +++
>  qemu-nbd.c |   3 +-
>  9 files changed, 214 insertions(+), 3 deletions(-)
>  create mode 100644 block/block-backend.c
>  create mode 100644 include/sysemu/block-backend.h
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index f45f939..a70140b 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -5,7 +5,7 @@ block-obj-y += qed-check.o
>  block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
>  block-obj-$(CONFIG_QUORUM) += quorum.o
>  block-obj-y += parallels.o blkdebug.o blkverify.o
> -block-obj-y += snapshot.o qapi.o
> +block-obj-y += block-backend.o snapshot.o qapi.o
>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>  block-obj-$(CONFIG_POSIX) += raw-posix.o
>  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
> diff --git a/block/block-backend.c b/block/block-backend.c
> new file mode 100644
> index 000..833f7d9
> --- /dev/null
> +++ b/block/block-backend.c
> @@ -0,0 +1,110 @@
> +/*
> + * QEMU Block backends
> + *
> + * Copyright (C) 2014 Red Hat, Inc.
> + *
> + * Authors:
> + *  Markus Armbruster ,
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> + * later.  See the COPYING file in the top-level directory.
> + */

I think we still have the long-term plan of exposing a block layer
library that can be consumed by libvirt. As the usage in qemu-io/img/nbd
shows, this will probably have to use BlockBackends, so this code is part
of the block layer core.

Considering this, using the LGPL would be more practical. Can you please
make this change for v2? (Personally, I would have used the MIT license
that the rest of the block layer uses, which also make copying code
around cleaner license-wise, but I know you dislike it.)

> +#include "sysemu/block-backend.h"
> +#include "block/block_int.h"
> +
> +struct BlockBackend {
> +char *name;
> +int refcnt;
> +QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
> +};
> +
> +static QTAILQ_HEAD(, BlockBackend) blk_backends =
> +QTAILQ_HEAD_INITIALIZER(blk_backends);
> +
> +/**
> + * blk_new:
> + * @name: name, must not be %NULL or empty
> + * @errp: return location for an error to be set on failure, or %NULL
> + *
> + * Create a new BlockBackend, with a reference count of one.  Fail if
> + * @name already exists.
> + *
> + * Returns: the BlockBackend on success, %NULL on failure
> + */
> +BlockBackend *blk_new(const char *name, Error **errp)
> +{
> +BlockBackend *blk = g_new0(BlockBackend, 1);
> +
> +assert(name && name[0]);

Re: [Qemu-devel] [PATCH v3 1/6] hw/misc/dyn_sysbus_binding: helpers for sysbus device dynamic binding

2014-09-10 Thread Alexander Graf



On 10.09.14 11:43, Paolo Bonzini wrote:
> Il 10/09/2014 11:31, Alexander Graf ha scritto:
 Yeah, please do the registration in sysbus.c, not in virt.c.  There is
 no reason to make the platform_bus_init_notify+DynSysbusNotifier
 interface public.  The code in sysbus.c can fill in the fields.
>> Sysbus != Platform bus. Sysbus is an in-QEMU representation of a
>> pseudo-bus that we put all devices onto that we consider unsorted.
>>
>> Platform bus is a machine representation of an actual bus that devices
>> are attached to. These devices usually are sysbus devices.
> 
> Is there any difference between the two?
> 
> Take a machine that has two chips, a SoC that does everything except
> USB, and a USB controller chip.
> 
> Strictly speaking the USB controller chip would be on a "platform bus",
> but we would likely put it on sysbus.
> 
> Why should it matter whether the devices are static or dynamic, for the
> sake of calling something the "system" or the "platform" bus?  I would
> say that QEMU calls "sysbus" the platform bus.
> 
> Some devices (e.g. the local APIC in x86, or the in-core timers and GIC
> in ARM) should probably not be in sysbus at all, and should attach
> directly to the CPU address space.  But that is a quirk in the modeling
> of those devices, it shouldn't mean that sysbus is not a "platform" bus.

On e500 for example, we have a predefined CCSR region. That is a machine
defined "platform bus". The offsets inside that region are strictly
defined by the spec.

Now take the serial ports. We have space for 2 serial ports inside of
that CCSR region. We can spawn these 2 ports in the machine file based
on -serial, but if you want to spawn them with -device, how do you tell
the machine whether they should go into the "big bucket platform bus" or
the "CCSR platform bus"?

In fact, thinking about this a bit more, maybe we should just have an
actual bus structure. Then we could have the legacy "big bucket" sysbus
bus that nobody may ever dynamically put devices into. For CCSR we could
create another bucket that the machine file can control where each
device goes and can also detect if a device doesn't fit. And then we
just declare the virt "platform bus" sysbus the default bus for cmdline
-device sysbusdevice and everything resolves automatically.


Alex

Re: [Qemu-devel] [PATCH 09/10] piix: do not raise irq while loading vmstate

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 12:50, Michael S. Tsirkin ha scritto:
> OK, got this, thanks for the explanation!
> So the reason i8259 might be out of sync is
> because it's not yet deserialized.

Yes, especially the IMR/IRR/ISR fields.

> I think it's a good idea to put (at least the
> last part) in the commit log.

Like this:

This patch disables raising an irq while loading the state of PCI bridge.
Because the i8259 has not been deserialized yet, raising an interrupt
could bring the system out-of-sync with the migration source.  For example,
the migration source could have masked the interrupt in the i8259. On the
destination, the i8259 device model would not know that yet and would
trigger an interrupt in the CPU.

This patch eliminates raising an irq and just restores the calculated
state fields in post_load function.  Interrupt state will be deserialized
separately through the IRR field of the i8259.

> Also it's updating irq state, not just raising irq,
> that might be problematic, right?

Well, the i8259 is in the reset state so ISR=IRR=0, aka all IRQ lines 
are known to be low.  But yes, in general it's the update that is 
problematic.

Re: [Qemu-devel] [PATCH 09/10] piix: do not raise irq while loading vmstate

2014-09-10 Thread Michael S. Tsirkin

On Wed, Sep 10, 2014 at 11:58:34AM +0200, Paolo Bonzini wrote:
> Il 10/09/2014 12:50, Michael S. Tsirkin ha scritto:
> > OK, got this, thanks for the explanation!
> > So the reason i8259 might be out of sync is
> > because it's not yet deserialized.
> 
> Yes, especially the IMR/IRR/ISR fields.
> 
> > I think it's a good idea to put (at least the
> > last part) in the commit log.
> 
> Like this:
> 
> This patch disables raising an irq while loading the state of PCI bridge.
> Because the i8259 has not been deserialized yet, raising an interrupt
> could bring the system out-of-sync with the migration source.  For 
> example,
> the migration source could have masked the interrupt in the i8259. On the
> destination, the i8259 device model would not know that yet and would
> trigger an interrupt in the CPU.
> 
> This patch eliminates raising an irq and just restores the calculated
> state fields in post_load function.  Interrupt state will be deserialized
> separately through the IRR field of the i8259.

Yes, thanks!
Except imho it's a bit better to s/raising/setting/ in the last paragraph.

> > Also it's updating irq state, not just raising irq,
> > that might be problematic, right?
> 
> Well, the i8259 is in the reset state so ISR=IRR=0, aka all IRQ lines 
> are known to be low.

By luck, yes.

>  But yes, in general it's the update that is 
> problematic.

Re: [Qemu-devel] [PATCH v3 1/6] hw/misc/dyn_sysbus_binding: helpers for sysbus device dynamic binding

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 11:56, Alexander Graf ha scritto:
> 
> 
> On 10.09.14 11:43, Paolo Bonzini wrote:
>> Il 10/09/2014 11:31, Alexander Graf ha scritto:
> Yeah, please do the registration in sysbus.c, not in virt.c.  There is
> no reason to make the platform_bus_init_notify+DynSysbusNotifier
> interface public.  The code in sysbus.c can fill in the fields.
>>> Sysbus != Platform bus. Sysbus is an in-QEMU representation of a
>>> pseudo-bus that we put all devices onto that we consider unsorted.
>>>
>>> Platform bus is a machine representation of an actual bus that devices
>>> are attached to. These devices usually are sysbus devices.
>>
>> Is there any difference between the two?
>>
>> Take a machine that has two chips, a SoC that does everything except
>> USB, and a USB controller chip.
>>
>> Strictly speaking the USB controller chip would be on a "platform bus",
>> but we would likely put it on sysbus.
>>
>> Why should it matter whether the devices are static or dynamic, for the
>> sake of calling something the "system" or the "platform" bus?  I would
>> say that QEMU calls "sysbus" the platform bus.
>>
>> Some devices (e.g. the local APIC in x86, or the in-core timers and GIC
>> in ARM) should probably not be in sysbus at all, and should attach
>> directly to the CPU address space.  But that is a quirk in the modeling
>> of those devices, it shouldn't mean that sysbus is not a "platform" bus.
> 
> On e500 for example, we have a predefined CCSR region. That is a machine
> defined "platform bus". The offsets inside that region are strictly
> defined by the spec.
> 
> Now take the serial ports. We have space for 2 serial ports inside of
> that CCSR region. We can spawn these 2 ports in the machine file based
> on -serial, but if you want to spawn them with -device, how do you tell
> the machine whether they should go into the "big bucket platform bus" or
> the "CCSR platform bus"?

Two possibilities:

1) you would use two instances of sysbus (one default, one created by
the board) and specify ",bus=ccsr" on the command line when you want to
add the device to the CCSR region.

The two would work exactly the same way, only with different algorithms
for resource allocation.

2) similar to ISA, you would create a new ccsr-bus device and a new
ccsr-serial device, and use -device ccsr-serial,index=[0|1],chardev=foo
to specify which of the two serial ports this is for.  Most of the fdt
magic could be shared by the sysbus and CCSR cases.

I think I prefer (2)...

Paolo

> In fact, thinking about this a bit more, maybe we should just have an
> actual bus structure. Then we could have the legacy "big bucket" sysbus
> bus that nobody may ever dynamically put devices into. For CCSR we could
> create another bucket that the machine file can control where each
> device goes and can also detect if a device doesn't fit. And then we
> just declare the virt "platform bus" sysbus the default bus for cmdline
> -device sysbusdevice and everything resolves automatically.

Re: [Qemu-devel] [PATCH 09/10] piix: do not raise irq while loading vmstate

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 13:04, Michael S. Tsirkin ha scritto:
>> > This patch disables raising an irq while loading the state of PCI 
>> > bridge.
>> > Because the i8259 has not been deserialized yet, raising an interrupt
>> > could bring the system out-of-sync with the migration source.  For 
>> > example,
>> > the migration source could have masked the interrupt in the i8259. On 
>> > the
>> > destination, the i8259 device model would not know that yet and would
>> > trigger an interrupt in the CPU.
>> > 
>> > This patch eliminates raising an irq and just restores the calculated
>> > state fields in post_load function.  Interrupt state will be 
>> > deserialized
>> > separately through the IRR field of the i8259.
> Yes, thanks!
> Except imho it's a bit better to s/raising/setting/ in the last paragraph.

And pretty much everywhere else, not just in the last paragraph.

Thanks!

Paolo

Re: [Qemu-devel] [PATCH v3 1/6] hw/misc/dyn_sysbus_binding: helpers for sysbus device dynamic binding

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 11:56, Alexander Graf ha scritto:
> 
> 
> On 10.09.14 11:43, Paolo Bonzini wrote:
>> Il 10/09/2014 11:31, Alexander Graf ha scritto:
> Yeah, please do the registration in sysbus.c, not in virt.c.  There is
> no reason to make the platform_bus_init_notify+DynSysbusNotifier
> interface public.  The code in sysbus.c can fill in the fields.
>>> Sysbus != Platform bus. Sysbus is an in-QEMU representation of a
>>> pseudo-bus that we put all devices onto that we consider unsorted.
>>>
>>> Platform bus is a machine representation of an actual bus that devices
>>> are attached to. These devices usually are sysbus devices.
>>
>> Is there any difference between the two?
>>
>> Take a machine that has two chips, a SoC that does everything except
>> USB, and a USB controller chip.
>>
>> Strictly speaking the USB controller chip would be on a "platform bus",
>> but we would likely put it on sysbus.
>>
>> Why should it matter whether the devices are static or dynamic, for the
>> sake of calling something the "system" or the "platform" bus?  I would
>> say that QEMU calls "sysbus" the platform bus.
>>
>> Some devices (e.g. the local APIC in x86, or the in-core timers and GIC
>> in ARM) should probably not be in sysbus at all, and should attach
>> directly to the CPU address space.  But that is a quirk in the modeling
>> of those devices, it shouldn't mean that sysbus is not a "platform" bus.
> 
> On e500 for example, we have a predefined CCSR region. That is a machine
> defined "platform bus". The offsets inside that region are strictly
> defined by the spec.
> 
> Now take the serial ports. We have space for 2 serial ports inside of
> that CCSR region. We can spawn these 2 ports in the machine file based
> on -serial, but if you want to spawn them with -device, how do you tell
> the machine whether they should go into the "big bucket platform bus" or
> the "CCSR platform bus"?

Two possibilities:

1) you would use two instances of sysbus (one default, one created by
the board) and specify ",bus=ccsr" on the command line when you want to
add the device to the CCSR region.

The two would work exactly the same way, only with different algorithms
for resource allocation.

2) similar to ISA, you would create a new ccsr-bus device and a new
ccsr-serial device, and use -device ccsr-serial,index=[0|1],chardev=foo
to specify which of the two serial ports this is for.  Perhaps some of
the fdt magic could be shared by the sysbus and CCSR cases.

Paolo

> In fact, thinking about this a bit more, maybe we should just have an
> actual bus structure. Then we could have the legacy "big bucket" sysbus
> bus that nobody may ever dynamically put devices into. For CCSR we could
> create another bucket that the machine file can control where each
> device goes and can also detect if a device doesn't fit. And then we
> just declare the virt "platform bus" sysbus the default bus for cmdline
> -device sysbusdevice and everything resolves automatically.

Re: [Qemu-devel] [PATCH v3 1/6] hw/misc/dyn_sysbus_binding: helpers for sysbus device dynamic binding

2014-09-10 Thread Alexander Graf



On 10.09.14 12:05, Paolo Bonzini wrote:
> Il 10/09/2014 11:56, Alexander Graf ha scritto:
>>
>>
>> On 10.09.14 11:43, Paolo Bonzini wrote:
>>> Il 10/09/2014 11:31, Alexander Graf ha scritto:
>> Yeah, please do the registration in sysbus.c, not in virt.c.  There is
>> no reason to make the platform_bus_init_notify+DynSysbusNotifier
>> interface public.  The code in sysbus.c can fill in the fields.
 Sysbus != Platform bus. Sysbus is an in-QEMU representation of a
 pseudo-bus that we put all devices onto that we consider unsorted.

 Platform bus is a machine representation of an actual bus that devices
 are attached to. These devices usually are sysbus devices.
>>>
>>> Is there any difference between the two?
>>>
>>> Take a machine that has two chips, a SoC that does everything except
>>> USB, and a USB controller chip.
>>>
>>> Strictly speaking the USB controller chip would be on a "platform bus",
>>> but we would likely put it on sysbus.
>>>
>>> Why should it matter whether the devices are static or dynamic, for the
>>> sake of calling something the "system" or the "platform" bus?  I would
>>> say that QEMU calls "sysbus" the platform bus.
>>>
>>> Some devices (e.g. the local APIC in x86, or the in-core timers and GIC
>>> in ARM) should probably not be in sysbus at all, and should attach
>>> directly to the CPU address space.  But that is a quirk in the modeling
>>> of those devices, it shouldn't mean that sysbus is not a "platform" bus.
>>
>> On e500 for example, we have a predefined CCSR region. That is a machine
>> defined "platform bus". The offsets inside that region are strictly
>> defined by the spec.
>>
>> Now take the serial ports. We have space for 2 serial ports inside of
>> that CCSR region. We can spawn these 2 ports in the machine file based
>> on -serial, but if you want to spawn them with -device, how do you tell
>> the machine whether they should go into the "big bucket platform bus" or
>> the "CCSR platform bus"?
> 
> Two possibilities:
> 
> 1) you would use two instances of sysbus (one default, one created by
> the board) and specify ",bus=ccsr" on the command line when you want to
> add the device to the CCSR region.
> 
> The two would work exactly the same way, only with different algorithms
> for resource allocation.
> 
> 2) similar to ISA, you would create a new ccsr-bus device and a new
> ccsr-serial device, and use -device ccsr-serial,index=[0|1],chardev=foo
> to specify which of the two serial ports this is for.  Most of the fdt
> magic could be shared by the sysbus and CCSR cases.
> 
> I think I prefer (2)...

Fair enough.

As far as moving "platform bus" logic into sysbus, I'd really like to
hold back and see what this whole thing ends up getting used for first.

So for now, I'd definitely prefer to keep "platform bus" logic and
"sysbus" logic separate. If we realize that every user only ever uses
the dynamic sysbus creation in conjunction with our "platform bus"
implementation, we can merge them.


Alex

Re: [Qemu-devel] [PATCH 05/23] block: Make BlockBackend own its BlockDriverState

2014-09-10 Thread Kevin Wolf

Am 10.09.2014 um 10:13 hat Markus Armbruster geschrieben:
> On BlockBackend destruction, unref its BlockDriverState.  Replaces the
> callers' unrefs.
> 
> Signed-off-by: Markus Armbruster 
> ---
>  block/block-backend.c |  9 ++---
>  blockdev.c| 11 +++
>  hw/block/xen_disk.c   |  6 +++---
>  qemu-img.c| 35 +--
>  qemu-io.c |  5 -
>  5 files changed, 9 insertions(+), 57 deletions(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 2a22660..ae51f7f 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -58,10 +58,7 @@ BlockBackend *blk_new(const char *name, Error **errp)
>   * @errp: return location for an error to be set on failure, or %NULL
>   *
>   * Create a new BlockBackend, with a reference count of one, and
> - * attach a new BlockDriverState to it, also with a reference count of
> - * one.  Caller owns *both* references.
> - * TODO Let caller own only the BlockBackend reference
> - * Fail if @name already exists.
> + * a new BlockDriverState attached.  Fail if @name already exists.
>   *
>   * Returns: the BlockBackend on success, %NULL on error
>   */
> @@ -88,6 +85,7 @@ BlockBackend *blk_new_with_bs(const char *name, Error 
> **errp)
>  static void blk_delete(BlockBackend *blk)
>  {
>  assert(!blk->refcnt);
> +bdrv_unref(blk->bs);
>  blk_detach_bs(blk);

I think the bdrv_unref() should really be part of blk_detach_bs().

The same way it would be more logical to have bdrv_ref() as part of
blk_attach_bs(). For blk_new_with_bs() this might mean bdrv_new,
blk_attach_bs, bdrv_unref, which looks a bit odd, but if blk_attach_bs()
is ever called from somewhere else, it probably makes more sense (if it
isn't, it should be static).

Kevin

Re: [Qemu-devel] [PATCH 08/10] pckbd: adding new fields to vmstate

2014-09-10 Thread Paolo Bonzini

Il 09/09/2014 15:07, Juan Quintela ha scritto:
> Paolo Bonzini  wrote:
>> From: Pavel Dovgalyuk 
>>
>> This patch adds outport to VMState to allow correct saving and restoring
>> the state of PC keyboard controller.
>>
>> Signed-off-by: Pavel Dovgalyuk 
>> Signed-off-by: Paolo Bonzini 
> 
> Acked-by: Juan Quintela 
>> ---
>>  hw/input/pckbd.c | 51 +++
>>  1 file changed, 51 insertions(+)
>>
>> diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
>> index 2ab8c87..2b0cd3d 100644
>> --- a/hw/input/pckbd.c
>> +++ b/hw/input/pckbd.c
>> @@ -131,6 +131,7 @@ typedef struct KBDState {
>>  uint8_t status;
>>  uint8_t mode;
>>  uint8_t outport;
>> +bool outport_present;
> 
> I don't like this one, but 
> 
> 
>>  /* Bitmask of devices with data available.  */
>>  uint8_t pending;
>>  void *kbd;
>> @@ -367,18 +368,68 @@ static void kbd_reset(void *opaque)
>>  s->mode = KBD_MODE_KBD_INT | KBD_MODE_MOUSE_INT;
>>  s->status = KBD_STAT_CMD | KBD_STAT_UNLOCKED;
>>  s->outport = KBD_OUT_RESET | KBD_OUT_A20;
>> +s->outport_present = false;
>> +}
>> +
>> +static uint8_t kbd_outport_default(KBDState *s)
>> +{
>> +return KBD_OUT_RESET | KBD_OUT_A20
>> +   | (s->status & KBD_STAT_OBF ? KBD_OUT_OBF : 0)
>> +   | (s->status & KBD_STAT_MOUSE_OBF ? KBD_OUT_MOUSE_OBF : 0);
>> +}
>> +
>> +static int kbd_outport_post_load(void *opaque, int version_id)
>> +{
>> +KBDState *s = opaque;
>> +s->outport_present = true;
>> +return 0;
>> +}
>> +
>> +static const VMStateDescription vmstate_kbd_outport = {
>> +.name = "pckbd_outport",
>> +.version_id = 1,
>> +.minimum_version_id = 1,
>> +.post_load = kbd_outport_post_load,
>> +.fields = (VMStateField[]) {
>> +VMSTATE_UINT8(outport, KBDState),
>> +VMSTATE_END_OF_LIST()
>> +}
>> +};
>> +
>> +static bool kbd_outport_needed(void *opaque)
>> +{
>> +KBDState *s = opaque;
>> +return s->outport != kbd_outport_default(s);
>> +}
>> +
>> +static int kbd_post_load(void *opaque, int version_id)
>> +{
>> +KBDState *s = opaque;
> 
> Only solution that I thought of is putting here:
> 
> 
>  s->outport |=
> | (s->status & KBD_STAT_OBF ? KBD_OUT_OBF : 0)
> | (s->status & KBD_STAT_MOUSE_OBF ? KBD_OUT_MOUSE_OBF : 0);
> 
> 
> But I am not sure if that bits can be off if status bits are on.

Yes, they can---the outport can be written by the guest (see
outport_write).  That was my first thought as well. :)

Paolo

> Thinking about it, why it is that it is not necessary to have on
> postload something like that?
> 
> PD: no, I don't claim to understand how the pc keyboard work ...
> 
> Later, Juan.
> 
>

Re: [Qemu-devel] [PATCH v3 1/6] hw/misc/dyn_sysbus_binding: helpers for sysbus device dynamic binding

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 12:09, Alexander Graf ha scritto:
> Fair enough.
> 
> As far as moving "platform bus" logic into sysbus, I'd really like to
> hold back and see what this whole thing ends up getting used for first.
> 
> So for now, I'd definitely prefer to keep "platform bus" logic and
> "sysbus" logic separate. If we realize that every user only ever uses
> the dynamic sysbus creation in conjunction with our "platform bus"
> implementation, we can merge them.

I agree.  As you pointed out, we have two usecases:

1) arbitrary dynamic sysbus devices, because you're playing with board
design or because you're working on a virtualized platform

2) pluggable components in a fixed board design (e.g. CCSR)

The only thing they share is FDT creation.  The other part, which is
assigning the interrupts and memory regions, is different: case (1) has
it driven by command line or simply bottom-to-top; case (2) has it
driven by an implementation of a spec.

It's not even clear to me that E500 CCSR devices should be sysbus, in
fact...

Paolo

Re: [Qemu-devel] [PATCH 09/10] piix: do not raise irq while loading vmstate

2014-09-10 Thread Michael S. Tsirkin

On Wed, Sep 10, 2014 at 12:07:22PM +0200, Paolo Bonzini wrote:
> Il 10/09/2014 13:04, Michael S. Tsirkin ha scritto:
> >> > This patch disables raising an irq while loading the state of PCI 
> >> > bridge.
> >> > Because the i8259 has not been deserialized yet, raising an interrupt
> >> > could bring the system out-of-sync with the migration source.  For 
> >> > example,
> >> > the migration source could have masked the interrupt in the i8259. 
> >> > On the
> >> > destination, the i8259 device model would not know that yet and would
> >> > trigger an interrupt in the CPU.
> >> > 
> >> > This patch eliminates raising an irq and just restores the calculated
> >> > state fields in post_load function.  Interrupt state will be 
> >> > deserialized
> >> > separately through the IRR field of the i8259.
> > Yes, thanks!
> > Except imho it's a bit better to s/raising/setting/ in the last paragraph.
> 
> And pretty much everywhere else, not just in the last paragraph.
> 
> Thanks!
> 
> Paolo

Right. With these minor changes, you can attach

Reviewed-by: Michael S. Tsirkin

Re: [Qemu-devel] [PATCH] virtio-pci: enable bus master for old guests

2014-09-10 Thread Michael S. Tsirkin

On Wed, Sep 10, 2014 at 11:01:48AM +0200, Greg Kurz wrote:
> On Wed, 10 Sep 2014 12:32:30 +0300
> "Michael S. Tsirkin"  wrote:
> 
> > On Wed, Sep 10, 2014 at 01:44:49PM +0530, Nikunj A Dadhania wrote:
> > > Greg Kurz  writes:
> > > 
> > > > On Mon, 8 Sep 2014 19:05:02 +0300
> > > > "Michael S. Tsirkin"  wrote:
> > > >
> > > >> commit cc943c36faa192cd4b32af8fe5edb31894017d35
> > > >> pci: Use bus master address space for delivering MSI/MSI-X messages
> > > >> breaks virtio-net for rhel6.[56] x86 guests because they don't
> > > >> enable bus mastering for virtio PCI devices
> > > >> 
> > > >> Old guests forgot to enable bus mastering, enable it
> > > >> automatically on DRIVER_OK.
> > > >> 
> > > >> Note: we should either back out the original patch from
> > > >> stable or apply this one on top.
> > > >> 
> > > >> Cc: qemu-sta...@nongnu.org
> > > >> Reported-by: Greg Kurz 
> > > >> Signed-off-by: Jan Kiszka 
> > > >> Signed-off-by: Michael S. Tsirkin 
> > > >> ---
> > > >>  hw/virtio/virtio-pci.c | 2 ++
> > > >>  1 file changed, 2 insertions(+)
> > > >> 
> > > >> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> > > >> index ddb5da1..af937d2 100644
> > > >> --- a/hw/virtio/virtio-pci.c
> > > >> +++ b/hw/virtio/virtio-pci.c
> > > >> @@ -320,6 +320,8 @@ static void virtio_ioport_write(void *opaque, 
> > > >> uint32_t addr, uint32_t val)
> > > >>  if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
> > > >>  !(proxy->pci_dev.config[PCI_COMMAND] & 
> > > >> PCI_COMMAND_MASTER)) {
> > > >>  proxy->flags |= VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
> > > >> +
> > > >> memory_region_set_enabled(&proxy->pci_dev.bus_master_enable_region,
> > > >> +  true);
> > > >>  }
> > > >>  break;
> > > >>  case VIRTIO_MSI_CONFIG_VECTOR:
> > > >
> > > > Cc'ing Alexey for some SLOF and early boot of the ppc64 kernel 
> > > > expertise.
> > > >
> > > > Michael,
> > > >
> > > > This was enough to fix virtio-net in the rhel6.5 x86 guest case. 
> > > > Unfortunately,
> > > > this fails for rhel6.5 ppc64 because it is never called... 
> > > 
> > > > I did some debugging: it looks like the guest kernel calls the OF
> > > > quisece call to flush pending DMA and disables bus master on the
> > > > virtio-blk device (PCI_COMMAND == 0x3).
> > > 
> > > Getting confused, above you are talking about virtio-net and here it is
> > > virtio-blk.
> > > 
> > > Anyways, the routines still remains same for both of them.  From SLOF
> > > during init we set DRIVER_OK, and after using the device during the
> > > quiesce, called from linux kernel VIRTIO_CONFIG_S_FAILED is set and then
> > > a VIRTIO_DEVICE_RESET is done.
> > 
> > BTW, you really should start enabling bus mastering, avoid relying
> > on the work-around we have for broken guests.
> > 
> 
> FWIW during my debug session, I see that SLOF enables bus mastering...
> unfortunately, it gets disabled at some point after the guest kernel
> is started (around the ppc64 prom_init() call).


OK I'm not sure I have all the details but does the patch I sent help
you?

-- 
MST

Re: [Qemu-devel] [PATCH v3 1/6] hw/misc/dyn_sysbus_binding: helpers for sysbus device dynamic binding

2014-09-10 Thread Alexander Graf



On 10.09.14 12:21, Paolo Bonzini wrote:
> Il 10/09/2014 12:09, Alexander Graf ha scritto:
>> Fair enough.
>>
>> As far as moving "platform bus" logic into sysbus, I'd really like to
>> hold back and see what this whole thing ends up getting used for first.
>>
>> So for now, I'd definitely prefer to keep "platform bus" logic and
>> "sysbus" logic separate. If we realize that every user only ever uses
>> the dynamic sysbus creation in conjunction with our "platform bus"
>> implementation, we can merge them.
> 
> I agree.  As you pointed out, we have two usecases:
> 
> 1) arbitrary dynamic sysbus devices, because you're playing with board
> design or because you're working on a virtualized platform
> 
> 2) pluggable components in a fixed board design (e.g. CCSR)
> 
> The only thing they share is FDT creation.  The other part, which is
> assigning the interrupts and memory regions, is different: case (1) has
> it driven by command line or simply bottom-to-top; case (2) has it
> driven by an implementation of a spec.
> 
> It's not even clear to me that E500 CCSR devices should be sysbus, in
> fact...

The problem if you continue that thought process is that we'd end up
with 500 different buses and 500 different uart boilerplate devices just
to fit into the respective buses ;).

Otherwise I agree.


Alex

Re: [Qemu-devel] [PATCH 1/6] hw/arm/virt: Provide flash devices for boot ROMs

2014-09-10 Thread Ard Biesheuvel

On 10 September 2014 11:12, Peter Maydell  wrote:
> On 10 September 2014 10:09, Ard Biesheuvel  wrote:
>> On 9 September 2014 20:20, Peter Maydell  wrote:
>>> There were previously questions about whether we should
>>> have flash or RAM at the bottom, but I think it makes
>>> sense just to have a "like vexpress" config with two
>>> flash devices. This does make telling QEMU about backing
>>> storage for the 2nd flash a little complicated, but I
>>> think anybody seriously running a config like that will
>>> be using the management tools layer anyhow.
>
>> You mean having to use -pflash and pad the images out to 64 MB? I
>> wouldn't worry about that.
>
> More particularly that if you don't want to provide backing
> storage for the first flash but only the second, you can't just
> use pflash but have to use the longer -drive options.
>

OK, I will drop this one from my series then.

Re: [Qemu-devel] [PATCH v3 1/6] hw/misc/dyn_sysbus_binding: helpers for sysbus device dynamic binding

2014-09-10 Thread Paolo Bonzini

Il 10/09/2014 12:26, Alexander Graf ha scritto:
> > It's not even clear to me that E500 CCSR devices should be sysbus, in
> > fact...
>
> The problem if you continue that thought process is that we'd end up
> with 500 different buses and 500 different uart boilerplate devices just
> to fit into the respective buses ;).

True.  The alternative is to hardcode the knowledge of the spec in the
management layers (since you cannot do index=0|1, you have to do
something akin to iobase=0x3f8 for the x86 COM1 port).  I guess you will
still need two sysbuses so that you get the correct hierarchy in the
device tree, right?

And we're back to the beginning of the discussion: the distinction
between a "sysbus" and a "platform bus" disappears, and in fact it even
feels more accurate to just call these things "sysbuses"...

Paolo

Re: [Qemu-devel] [PATCH 07/10] serial: fixing vmstate for save/restore

2014-09-10 Thread Paolo Bonzini

Il 09/09/2014 14:30, Paolo Bonzini ha scritto:
> From: Pavel Dovgalyuk 
> 
> Some fields were added to VMState by this patch to preserve correct
> loading of the serial port controller state.
> Updating FCR value while loading was also modified to disable generating
> an interrupt by loadvm.

This is actually not entirely correct because...

> Signed-off-by: Pavel Dovgalyuk 
> Signed-off-by: Paolo Bonzini 
> ---
>  hw/char/serial.c | 265 
> +--
>  1 file changed, 220 insertions(+), 45 deletions(-)
> 
> diff --git a/hw/char/serial.c b/hw/char/serial.c
> index 764e184..2b04927 100644
> --- a/hw/char/serial.c
> +++ b/hw/char/serial.c
> @@ -272,6 +272,64 @@ static gboolean serial_xmit(GIOChannel *chan, 
> GIOCondition cond, void *opaque)
>  }
>  
>  
> +/* Setter for FCR.
> +   is_load flag means, that value is set while loading VM state
> +   and interrupt should not be invoked */
> +static void serial_write_fcr(void *opaque, uint32_t val, int is_load)
> +{
> +SerialState *s = opaque;
> +val = val & 0xFF;
> +
> +if (s->fcr == val) {
> +return;
> +}
> +
> +/* Did the enable/disable flag change? If so, make sure FIFOs get 
> flushed */
> +if ((val ^ s->fcr) & UART_FCR_FE) {
> +val |= UART_FCR_XFR | UART_FCR_RFR;
> +}

... if the value of the FE bit changes, this will nullify the change you
made to send/restore FIFOs.  The handling of RFR/XFR must remain in
serial_ioport_write, and serial_write_fcr must receive the final value
(masked by 0xc9).  I can fix this up.

Paolo

> +/* FIFO clear */
> +
> +if (val & UART_FCR_RFR) {
> +timer_del(s->fifo_timeout_timer);
> +s->timeout_ipending = 0;
> +fifo8_reset(&s->recv_fifo);
> +}
> +
> +if (val & UART_FCR_XFR) {
> +fifo8_reset(&s->xmit_fifo);
> +}
> +
> +if (val & UART_FCR_FE) {
> +s->iir |= UART_IIR_FE;
> +/* Set recv_fifo trigger Level */
> +switch (val & 0xC0) {
> +case UART_FCR_ITL_1:
> +s->recv_fifo_itl = 1;
> +break;
> +case UART_FCR_ITL_2:
> +s->recv_fifo_itl = 4;
> +break;
> +case UART_FCR_ITL_3:
> +s->recv_fifo_itl = 8;
> +break;
> +case UART_FCR_ITL_4:
> +s->recv_fifo_itl = 14;
> +break;
> +}
> +} else {
> +s->iir &= ~UART_IIR_FE;
> +}
> +
> +/* Set fcr - or at least the bits in it that are supposed to "stick" */
> +s->fcr = val & 0xC9;
> +
> +if (!is_load) {
> +serial_update_irq(s);
> +}
> +}
> +
>  static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val,
>  unsigned size)
>  {
> @@ -327,50 +385,7 @@ static void serial_ioport_write(void *opaque, hwaddr 
> addr, uint64_t val,
>  }
>  break;
>  case 2:
> -val = val & 0xFF;
> -
> -if (s->fcr == val)
> -break;
> -
> -/* Did the enable/disable flag change? If so, make sure FIFOs get 
> flushed */
> -if ((val ^ s->fcr) & UART_FCR_FE)
> -val |= UART_FCR_XFR | UART_FCR_RFR;
> -
> -/* FIFO clear */
> -
> -if (val & UART_FCR_RFR) {
> -timer_del(s->fifo_timeout_timer);
> -s->timeout_ipending=0;
> -fifo8_reset(&s->recv_fifo);
> -}
> -
> -if (val & UART_FCR_XFR) {
> -fifo8_reset(&s->xmit_fifo);
> -}
> -
> -if (val & UART_FCR_FE) {
> -s->iir |= UART_IIR_FE;
> -/* Set recv_fifo trigger Level */
> -switch (val & 0xC0) {
> -case UART_FCR_ITL_1:
> -s->recv_fifo_itl = 1;
> -break;
> -case UART_FCR_ITL_2:
> -s->recv_fifo_itl = 4;
> -break;
> -case UART_FCR_ITL_3:
> -s->recv_fifo_itl = 8;
> -break;
> -case UART_FCR_ITL_4:
> -s->recv_fifo_itl = 14;
> -break;
> -}
> -} else
> -s->iir &= ~UART_IIR_FE;
> -
> -/* Set fcr - or at least the bits in it that are supposed to "stick" 
> */
> -s->fcr = val & 0xC9;
> -serial_update_irq(s);
> +serial_write_fcr(s, val, 0);
>  break;
>  case 3:
>  {
> @@ -590,6 +605,14 @@ static void serial_pre_save(void *opaque)
>  s->fcr_vmstate = s->fcr;
>  }
>  
> +static int serial_pre_load(void *opaque)
> +{
> +SerialState *s = (SerialState *)opaque;
> +s->thr_ipending = -1;
> +s->poll_msl = -1;
> +return 0;
> +}
> +
>  static int serial_post_load(void *opaque, int version_id)
>  {
>  SerialState *s = opaque;
> @@ -597,17 +620,139 @@ static int serial_post_load(void *opaque, int 
> version_id)
>  if (version_id < 3) {
>  s->fcr_vmstate = 0;
>  }
> +if (s->thr_ipending == -1) {
> +s->thr_ipending = ((s->

Re: [Qemu-devel] [PATCH] hw/arm/virt: fix pl011 and pl031 irq flags

2014-09-10 Thread Christoffer Dall

On Tue, Sep 09, 2014 at 03:53:43PM +0100, Peter Maydell wrote:
> The pl011 and pl031 devices both use level triggered interrupts,
> but the device tree we construct was incorrectly telling the
> kernel to configure the GIC to treat them as edge triggered.
> This meant that output from the pl011 would hang after a while.
> 
> Signed-off-by: Peter Maydell 
> Cc: qemu-sta...@nongnu.org
> ---
> Thanks to Christoffer Dall for figuring out the cause of the hangs here.
> 
>  hw/arm/virt.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index e8f231e..1b343f0 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -371,7 +371,7 @@ static void create_uart(const VirtBoardInfo *vbi, 
> qemu_irq *pic)
>   2, base, 2, size);
>  qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
> GIC_FDT_IRQ_TYPE_SPI, irq,
> -   GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
> +   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
>  qemu_fdt_setprop_cells(vbi->fdt, nodename, "clocks",
> vbi->clock_phandle, vbi->clock_phandle);
>  qemu_fdt_setprop(vbi->fdt, nodename, "clock-names",
> @@ -398,7 +398,7 @@ static void create_rtc(const VirtBoardInfo *vbi, qemu_irq 
> *pic)
>   2, base, 2, size);
>  qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
> GIC_FDT_IRQ_TYPE_SPI, irq,
> -   GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
> +   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
>  qemu_fdt_setprop_cell(vbi->fdt, nodename, "clocks", vbi->clock_phandle);
>  qemu_fdt_setprop_string(vbi->fdt, nodename, "clock-names", "apb_pclk");
>  g_free(nodename);
> -- 
> 1.9.1
> 
I've been trying to figure out why we would see this particular hang
with SMP and not UP (or maybe it is just very much more unlikely to
happen with UP), but haven't been able to come up with a sequence of
events to support this yet.  It also worries me that we weren't seeing
this with KVM, since it indicates that we're either doing something
wrong in the KVM or QEMU GIC emulation code, potentially.

In any case, this patch is correct, so:

Acked-by: Christoffer Dall

Re: [Qemu-devel] [PATCH] hw/arm/virt: fix pl011 and pl031 irq flags

2014-09-10 Thread Ard Biesheuvel

On 10 September 2014 12:43, Christoffer Dall
 wrote:
> On Tue, Sep 09, 2014 at 03:53:43PM +0100, Peter Maydell wrote:
>> The pl011 and pl031 devices both use level triggered interrupts,
>> but the device tree we construct was incorrectly telling the
>> kernel to configure the GIC to treat them as edge triggered.
>> This meant that output from the pl011 would hang after a while.
>>
>> Signed-off-by: Peter Maydell 
>> Cc: qemu-sta...@nongnu.org
>> ---
>> Thanks to Christoffer Dall for figuring out the cause of the hangs here.
>>
>>  hw/arm/virt.c | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index e8f231e..1b343f0 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -371,7 +371,7 @@ static void create_uart(const VirtBoardInfo *vbi, 
>> qemu_irq *pic)
>>   2, base, 2, size);
>>  qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
>> GIC_FDT_IRQ_TYPE_SPI, irq,
>> -   GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
>> +   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
>>  qemu_fdt_setprop_cells(vbi->fdt, nodename, "clocks",
>> vbi->clock_phandle, vbi->clock_phandle);
>>  qemu_fdt_setprop(vbi->fdt, nodename, "clock-names",
>> @@ -398,7 +398,7 @@ static void create_rtc(const VirtBoardInfo *vbi, 
>> qemu_irq *pic)
>>   2, base, 2, size);
>>  qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
>> GIC_FDT_IRQ_TYPE_SPI, irq,
>> -   GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
>> +   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
>>  qemu_fdt_setprop_cell(vbi->fdt, nodename, "clocks", vbi->clock_phandle);
>>  qemu_fdt_setprop_string(vbi->fdt, nodename, "clock-names", "apb_pclk");
>>  g_free(nodename);
>> --
>> 1.9.1
>>
> I've been trying to figure out why we would see this particular hang
> with SMP and not UP (or maybe it is just very much more unlikely to
> happen with UP), but haven't been able to come up with a sequence of
> events to support this yet.  It also worries me that we weren't seeing
> this with KVM, since it indicates that we're either doing something
> wrong in the KVM or QEMU GIC emulation code, potentially.
>
> In any case, this patch is correct, so:
>
> Acked-by: Christoffer Dall 

I am still seeing lockups and self detected stalls even with this
patch, and sometimes just hitting a key into the console will get
things moving again.
So I am not convinced yet whether this fixes something fundamentally,
or just works around it by taking an alternative code path through the
kernel which doesn't trigger the same root bug.

Re: [Qemu-devel] [PATCH 00/10] x86: migrate more data

2014-09-10 Thread Paolo Bonzini

Il 09/09/2014 14:29, Paolo Bonzini ha scritto:
> Juan, David, Amit, here are Pavel's fixes for x86 migration.
> Please help applying them, or ack them so that I can merge
> them through the KVM tree.
> 
> Thanks,
> 
> Paolo
> 
> Paolo Bonzini (1):
>   vl: use QLIST_FOREACH_SAFE to visit change state handlers
> 
> Pavel Dovgalyuk (9):
>   apic_common: vapic_paddr synchronization fix
>   cpu: init vmstate for ticks and clock offset
>   pcspk: adding vmstate for save/restore
>   fdc: adding vmstate for save/restore
>   parallel: adding vmstate for save/restore
>   serial: fixing vmstate for save/restore
>   pckbd: adding new fields to vmstate
>   piix: do not raise irq while loading vmstate
>   mc146818rtc: add missed field to vmstate
> 
>  cpus.c |   8 +-
>  hw/audio/pcspk.c   |  16 ++-
>  hw/block/fdc.c |  74 ++
>  hw/char/parallel.c |  18 
>  hw/char/serial.c   | 265 
> -
>  hw/i386/kvmvapic.c |  37 +--
>  hw/input/pckbd.c   |  51 ++
>  hw/pci-host/piix.c |  26 -
>  hw/timer/mc146818rtc.c |  25 +
>  include/qemu-common.h  |   2 +
>  vl.c   |   5 +-
>  12 files changed, 463 insertions(+), 64 deletions(-)
> 

I've applied all patches except 4 to the uq/master branch.  Patch 4
deserves more discussion to see what to do about older machine types
(spoiler: my idea is "nothing" :)).

Paolo

Re: [Qemu-devel] [PATCH 00/10] x86: migrate more data

2014-09-10 Thread Michael S. Tsirkin

On Wed, Sep 10, 2014 at 12:50:35PM +0200, Paolo Bonzini wrote:
> Il 09/09/2014 14:29, Paolo Bonzini ha scritto:
> > Juan, David, Amit, here are Pavel's fixes for x86 migration.
> > Please help applying them, or ack them so that I can merge
> > them through the KVM tree.
> > 
> > Thanks,
> > 
> > Paolo
> > 
> > Paolo Bonzini (1):
> >   vl: use QLIST_FOREACH_SAFE to visit change state handlers
> > 
> > Pavel Dovgalyuk (9):
> >   apic_common: vapic_paddr synchronization fix
> >   cpu: init vmstate for ticks and clock offset
> >   pcspk: adding vmstate for save/restore
> >   fdc: adding vmstate for save/restore
> >   parallel: adding vmstate for save/restore
> >   serial: fixing vmstate for save/restore
> >   pckbd: adding new fields to vmstate
> >   piix: do not raise irq while loading vmstate
> >   mc146818rtc: add missed field to vmstate
> > 
> >  cpus.c |   8 +-
> >  hw/audio/pcspk.c   |  16 ++-
> >  hw/block/fdc.c |  74 ++
> >  hw/char/parallel.c |  18 
> >  hw/char/serial.c   | 265 
> > -
> >  hw/i386/kvmvapic.c |  37 +--
> >  hw/input/pckbd.c   |  51 ++
> >  hw/pci-host/piix.c |  26 -
> >  hw/timer/mc146818rtc.c |  25 +
> >  include/qemu-common.h  |   2 +
> >  vl.c   |   5 +-
> >  12 files changed, 463 insertions(+), 64 deletions(-)
> > 
> 
> I've applied all patches except 4 to the uq/master branch.  Patch 4
> deserves more discussion to see what to do about older machine types
> (spoiler: my idea is "nothing" :)).
> 
> Paolo

9/10 with a tweak to commit log/comments?

Re: [Qemu-devel] [RFC v1 2/6] stm32f205_USART: Add the stm32f205 SoC USART Controller

2014-09-10 Thread Alistair Francis

On Tue, Sep 9, 2014 at 11:21 PM, Peter Crosthwaite
 wrote:
> On Tue, Sep 9, 2014 at 6:24 PM, Alistair Francis  wrote:
>> This patch adds the stm32f205 USART controller
>> (UART also uses the same controller).
>>
>> Signed-off-by: Alistair Francis 
>> ---
>> V2:
>>  - Small changes thanks to Peter C
>>  - Rename for the Netduino 2 and it's SoC
>>
>>  hw/char/Makefile.objs |   1 +
>>  hw/char/stm32f205_usart.c | 205 
>> ++
>>  include/hw/char/stm32f205_usart.h |  64 
>>  3 files changed, 270 insertions(+)
>>  create mode 100644 hw/char/stm32f205_usart.c
>>  create mode 100644 include/hw/char/stm32f205_usart.h
>>
>> diff --git a/hw/char/Makefile.objs b/hw/char/Makefile.objs
>> index 317385d..b1f7e80 100644
>> --- a/hw/char/Makefile.objs
>> +++ b/hw/char/Makefile.objs
>> @@ -15,6 +15,7 @@ obj-$(CONFIG_OMAP) += omap_uart.o
>>  obj-$(CONFIG_SH4) += sh_serial.o
>>  obj-$(CONFIG_PSERIES) += spapr_vty.o
>>  obj-$(CONFIG_DIGIC) += digic-uart.o
>> +obj-$(CONFIG_NETDUINO2) += stm32f205_usart.o
>>
>>  common-obj-$(CONFIG_ETRAXFS) += etraxfs_ser.o
>>  common-obj-$(CONFIG_ISA_DEBUG) += debugcon.o
>> diff --git a/hw/char/stm32f205_usart.c b/hw/char/stm32f205_usart.c
>> new file mode 100644
>> index 000..c042b4b
>> --- /dev/null
>> +++ b/hw/char/stm32f205_usart.c
>> @@ -0,0 +1,205 @@
>> +/*
>> + * STM32F205xx USART
>> + *
>> + * Copyright (c) 2014 Alistair Francis 
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a 
>> copy
>> + * of this software and associated documentation files (the "Software"), to 
>> deal
>> + * in the Software without restriction, including without limitation the 
>> rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included 
>> in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
>> FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "hw/char/stm32f205_usart.h"
>> +
>> +#ifndef STM_USART_ERR_DEBUG
>> +#define STM_USART_ERR_DEBUG 0
>> +#endif
>> +
>> +#define DB_PRINT_L(lvl, fmt, args...) do { \
>> +if (STM_USART_ERR_DEBUG >= lvl) { \
>> +fprintf(stderr, "stm32f205xx_usart: %s:" fmt, __func__, ## args); \
>> +} \
>> +} while (0);
>> +
>> +#define DB_PRINT(fmt, args...) DB_PRINT_L(1, fmt, ## args)
>> +
>> +static int usart_can_receive(void *opaque)
>> +{
>> +Stm32f205UsartState *s = opaque;
>> +
>> +if (s->usart_cr1 & USART_CR1_UE && s->usart_cr1 & USART_CR1_TE) {
>> +return 1;
>> +}
>
> So it's usual to block a UART on the fifo filling rather than the
> master enable switches. Corking the fifo on the master enable means
> QEMU will buffer UART input long-term until the guest turns the fifo
> on, where in reality the hardware should just drop the chars. We
> should do something similar.
>
> The reason (as far as I know anyways) for can_recieve and giving qemu
> serial false closed-loop implementation (network has it too) is to
> deal with large amounts of instantaneous data in situations where the
> real-time delays on real-hardware would give natural bandwidth
> limiting. Thus the main app of can-recieve 0-return is 'your pumping
> serial data way too fast such that my fifo is full and real hardware
> would never go that fast'.

Ok, that makes sense. I can add blocking based on the USART_SR_RXNE bit

>
> Also, did you me the RE bit rather than the TE bit?

Yes, good spot

>
>> +
>> +return 0;
>> +}
>> +
>> +static void usart_receive(void *opaque, const uint8_t *buf, int size)
>> +{
>> +Stm32f205UsartState *s = opaque;
>> +
>> +s->usart_dr = *buf;
>> +
>> +s->usart_sr |= USART_SR_RXNE;
>> +
>
> This might be a good condition to block can_recieve on - the
> USART_SR_RXNE. And if the enables are off, just drop the char.
>
>> +if (s->usart_cr1 & USART_CR1_RXNEIE) {
>> +qemu_set_irq(s->irq, 1);
>> +}
>> +
>> +DB_PRINT("Receiving: %c\n", s->usart_dr);
>> +}
>> +
>> +static void usart_reset(DeviceState *dev)
>
> stm32f..._ prefix to function name. For consistency and makes gdb
> breakpoints slightly easier if function names are globally unique.

Yep, will fix in all patches

>
>> +{
>> +Stm32f205UsartState *s = STM32F205xx_USART(dev);
>> +
>> +s->usart_sr = 0x00C0;
>>

1 2 3 4 >

1 - 100 of 341 matches

Mail list logo