[Xen-devel] [PATCH v3] x86: also allow REP STOS emulation acceleration
While the REP MOVS acceleration appears to have helped qemu-traditional based guests, qemu-upstream (or really the respective video BIOSes) doesn't appear to benefit from that. Instead the acceleration added here provides a visible performance improvement during very early HVM guest boot. Signed-off-by: Jan Beulich --- v3: Drop now pointless memory clobber from asm() in hvmemul_rep_stos() Introduce and use ASSERT_UNREACHABLE(), as suggested by Andrew. v2: Fix asm() constraints in hvmemul_rep_stos(), as pointed out by Andrew. Add output operand telling the compiler that "buf" is being written. --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -731,6 +731,17 @@ static int hvmemul_rep_movs_discard( return X86EMUL_OKAY; } +static int hvmemul_rep_stos_discard( +void *p_data, +enum x86_segment seg, +unsigned long offset, +unsigned int bytes_per_rep, +unsigned long *reps, +struct x86_emulate_ctxt *ctxt) +{ +return X86EMUL_OKAY; +} + static int hvmemul_rep_outs_discard( enum x86_segment src_seg, unsigned long src_offset, @@ -982,6 +993,113 @@ static int hvmemul_rep_movs( return X86EMUL_OKAY; } +static int hvmemul_rep_stos( +void *p_data, +enum x86_segment seg, +unsigned long offset, +unsigned int bytes_per_rep, +unsigned long *reps, +struct x86_emulate_ctxt *ctxt) +{ +struct hvm_emulate_ctxt *hvmemul_ctxt = +container_of(ctxt, struct hvm_emulate_ctxt, ctxt); +unsigned long addr; +paddr_t gpa; +p2m_type_t p2mt; +bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF); +int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps, + hvm_access_write, hvmemul_ctxt, &addr); + +if ( rc == X86EMUL_OKAY ) +{ +uint32_t pfec = PFEC_page_present | PFEC_write_access; + +if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 ) +pfec |= PFEC_user_mode; + +rc = hvmemul_linear_to_phys( +addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt); +} +if ( rc != X86EMUL_OKAY ) +return rc; + +/* Check for MMIO op */ +(void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt); + +switch ( p2mt ) +{ +unsigned long bytes; +void *buf; + +default: +/* Allocate temporary buffer. */ +for ( ; ; ) +{ +bytes = *reps * bytes_per_rep; +buf = xmalloc_bytes(bytes); +if ( buf || *reps <= 1 ) +break; +*reps >>= 1; +} + +if ( !buf ) +buf = p_data; +else +switch ( bytes_per_rep ) +{ +unsigned long dummy; + +#define CASE(bits, suffix) \ +case (bits) / 8: \ +asm ( "rep stos" #suffix \ + : "=m" (*(char (*)[bytes])buf), \ +"=D" (dummy), "=c" (dummy) \ + : "a" (*(const uint##bits##_t *)p_data), \ + "1" (buf), "2" (*reps) ); \ +break +CASE(8, b); +CASE(16, w); +CASE(32, l); +CASE(64, q); +#undef CASE + +default: +ASSERT_UNREACHABLE(); +xfree(buf); +return X86EMUL_UNHANDLEABLE; +} + +/* Adjust address for reverse store. */ +if ( df ) +gpa -= bytes - bytes_per_rep; + +rc = hvm_copy_to_guest_phys(gpa, buf, bytes); + +if ( buf != p_data ) +xfree(buf); + +switch ( rc ) +{ +case HVMCOPY_gfn_paged_out: +case HVMCOPY_gfn_shared: +return X86EMUL_RETRY; +case HVMCOPY_okay: +return X86EMUL_OKAY; +} + +gdprintk(XENLOG_WARNING, + "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n", + gpa, *reps, bytes_per_rep); +/* fall through */ +case p2m_mmio_direct: +return X86EMUL_UNHANDLEABLE; + +case p2m_mmio_dm: +return hvmemul_do_mmio(gpa, reps, bytes_per_rep, 0, IOREQ_WRITE, df, + p_data); +} +} + static int hvmemul_read_segment( enum x86_segment seg, struct segment_register *reg, @@ -1239,6 +1357,7 @@ static const struct x86_emulate_ops hvm_ .rep_ins = hvmemul_rep_ins, .rep_outs = hvmemul_rep_outs, .rep_movs = hvmemul_rep_movs, +.rep_stos = hvmemul_rep_stos, .read_segment = hvmemul_read_segment, .write_segment = hvmemul_write_segment, .read_io = hvmemul_read_io, @@ -1264,6 +1383,7 @@ static const struct x86_emulate_ops hvm_ .rep_ins = hvmemul_rep_ins_discard, .rep_outs = hvmemul_rep_outs_discard, .rep_mo
[Xen-devel] [PATCH v2] x86/HVM: make hvm_efer_valid() honor guest features
Following the earlier similar change validating CR4 modifications. Signed-off-by: Jan Beulich --- v2: consider CR0.PG during restore when checking EFER.LMA --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1672,20 +1672,53 @@ static int hvm_save_cpu_ctxt(struct doma return 0; } -static bool_t hvm_efer_valid(struct domain *d, - uint64_t value, uint64_t efer_validbits) +static bool_t hvm_efer_valid(const struct vcpu *v, uint64_t value, + signed int cr0_pg) { -if ( nestedhvm_enabled(d) && cpu_has_svm ) -efer_validbits |= EFER_SVME; +unsigned int ext1_ecx = 0, ext1_edx = 0; -return !((value & ~efer_validbits) || - ((sizeof(long) != 8) && (value & EFER_LME)) || - (!cpu_has_svm && (value & EFER_SVME)) || - (!cpu_has_nx && (value & EFER_NX)) || - (!cpu_has_syscall && (value & EFER_SCE)) || - (!cpu_has_lmsl && (value & EFER_LMSLE)) || - (!cpu_has_ffxsr && (value & EFER_FFXSE)) || - ((value & (EFER_LME|EFER_LMA)) == EFER_LMA)); +if ( cr0_pg < 0 && !is_hardware_domain(v->domain) ) +{ +unsigned int level; + +ASSERT(v == current); +hvm_cpuid(0x8000, &level, NULL, NULL, NULL); +if ( level >= 0x8001 ) +hvm_cpuid(0x8001, NULL, NULL, &ext1_ecx, &ext1_edx); +} +else +{ +ext1_edx = boot_cpu_data.x86_capability[X86_FEATURE_LM / 32]; +ext1_ecx = boot_cpu_data.x86_capability[X86_FEATURE_SVM / 32]; +} + +if ( (value & EFER_SCE) && + !(ext1_edx & cpufeat_mask(X86_FEATURE_SYSCALL)) ) +return 0; + +if ( (value & (EFER_LME | EFER_LMA)) && + !(ext1_edx & cpufeat_mask(X86_FEATURE_LM)) ) +return 0; + +if ( cr0_pg > 0 && (value & EFER_LMA) && (!(value & EFER_LME) || !cr0_pg) ) +return 0; + +if ( (value & EFER_NX) && !(ext1_edx & cpufeat_mask(X86_FEATURE_NX)) ) +return 0; + +if ( (value & EFER_SVME) && + (!(ext1_ecx & cpufeat_mask(X86_FEATURE_SVM)) || + !nestedhvm_enabled(v->domain)) ) +return 0; + +if ( (value & EFER_LMSLE) && !cpu_has_lmsl ) +return 0; + +if ( (value & EFER_FFXSE) && + !(ext1_edx & cpufeat_mask(X86_FEATURE_FFXSR)) ) +return 0; + +return 1; } /* These reserved bits in lower 32 remain 0 after any load of CR0 */ @@ -1763,7 +1796,6 @@ static int hvm_load_cpu_ctxt(struct doma struct vcpu *v; struct hvm_hw_cpu ctxt; struct segment_register seg; -uint64_t efer_validbits; /* Which vcpu is this? */ vcpuid = hvm_load_instance(h); @@ -1794,9 +1826,7 @@ static int hvm_load_cpu_ctxt(struct doma return -EINVAL; } -efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_LMA - | EFER_NX | EFER_SCE; -if ( !hvm_efer_valid(d, ctxt.msr_efer, efer_validbits) ) +if ( !hvm_efer_valid(v, ctxt.msr_efer, MASK_EXTR(ctxt.cr0, X86_CR0_PG)) ) { printk(XENLOG_G_ERR "HVM%d restore: bad EFER %#" PRIx64 "\n", d->domain_id, ctxt.msr_efer); @@ -2936,12 +2966,10 @@ err: int hvm_set_efer(uint64_t value) { struct vcpu *v = current; -uint64_t efer_validbits; value &= ~EFER_LMA; -efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_NX | EFER_SCE; -if ( !hvm_efer_valid(v->domain, value, efer_validbits) ) +if ( !hvm_efer_valid(v, value, -1) ) { gdprintk(XENLOG_WARNING, "Trying to set reserved bit in " "EFER: %#"PRIx64"\n", value); x86/HVM: make hvm_efer_valid() honor guest features Following the earlier similar change validating CR4 modifications. Signed-off-by: Jan Beulich --- v2: consider CR0.PG during restore when checking EFER.LMA --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1672,20 +1672,53 @@ static int hvm_save_cpu_ctxt(struct doma return 0; } -static bool_t hvm_efer_valid(struct domain *d, - uint64_t value, uint64_t efer_validbits) +static bool_t hvm_efer_valid(const struct vcpu *v, uint64_t value, + signed int cr0_pg) { -if ( nestedhvm_enabled(d) && cpu_has_svm ) -efer_validbits |= EFER_SVME; +unsigned int ext1_ecx = 0, ext1_edx = 0; -return !((value & ~efer_validbits) || - ((sizeof(long) != 8) && (value & EFER_LME)) || - (!cpu_has_svm && (value & EFER_SVME)) || - (!cpu_has_nx && (value & EFER_NX)) || - (!cpu_has_syscall && (value & EFER_SCE)) || - (!cpu_has_lmsl && (value & EFER_LMSLE)) || - (!cpu_has_ffxsr && (value & EFER_FFXSE)) || - ((value & (EFER_LME|EFER_LMA)) == EFER_LMA)); +if ( cr0_pg < 0 && !is_hardware_domain(v->domain) ) +{ +unsigned int level; + +ASSERT(v == current); +hvm_cpuid(0x8000, &level, NULL, NULL, NULL); +if (
[Xen-devel] [PATCH] x86: xen: mmu: Remove unused function
Remove the function set_pte_mfn() that is not used anywhere. This was partially found by using a static code analysis program called cppcheck. Signed-off-by: Rickard Strandqvist --- arch/x86/xen/mmu.c |9 - arch/x86/xen/mmu.h |2 -- 2 files changed, 11 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a8a1a3d..6959550 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -281,15 +281,6 @@ static void xen_set_pmd(pmd_t *ptr, pmd_t val) xen_set_pmd_hyper(ptr, val); } -/* - * Associate a virtual page frame with a given physical page frame - * and protection flags for that frame. - */ -void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) -{ - set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); -} - static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) { struct mmu_update u; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 73809bb..f2dcf79 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -13,8 +13,6 @@ enum pt_level { bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); -void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] common/memory: fix an XSM error path
XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap return the extent at which failure was detected, not error indicators. Signed-off-by: Jan Beulich --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN return start_extent; args.domain = d; -rc = xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d); -if ( rc ) +if ( xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d) ) { rcu_unlock_domain(d); -return rc; +return start_extent; } switch ( op ) common/memory: fix an XSM error path XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap return the extent at which failure was detected, not error indicators. Signed-off-by: Jan Beulich --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN return start_extent; args.domain = d; -rc = xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d); -if ( rc ) +if ( xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d) ) { rcu_unlock_domain(d); -return rc; +return start_extent; } switch ( op ) ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] x86emul: tighten CLFLUSH emulation
While for us it's not as bad as it was for Linux, their commit 13e457e0ee ("KVM: x86: Emulator does not decode clflush well", by Nadav Amit ) nevertheless points out two shortcomings in our code: opcode 0F AE /7 is clflush only when it uses a memory mode (otherwise it's SFENCE) and when there's no REP prefix (an operand size prefix is fine, as that's CLFLUSHOPT). Signed-off-by: Jan Beulich --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -4400,7 +4400,9 @@ x86_emulate( case 0xae: /* Grp15 */ switch ( modrm_reg & 7 ) { -case 7: /* clflush */ +case 7: /* clflush{,opt} */ +fail_if(modrm_mod == 3); +fail_if(rep_prefix()); fail_if(ops->wbinvd == NULL); if ( (rc = ops->wbinvd(ctxt)) != 0 ) goto done; x86emul: tighten CLFLUSH emulation While for us it's not as bad as it was for Linux, their commit 13e457e0ee ("KVM: x86: Emulator does not decode clflush well", by Nadav Amit ) nevertheless points out two shortcomings in our code: opcode 0F AE /7 is clflush only when it uses a memory mode (otherwise it's SFENCE) and when there's no REP prefix (an operand size prefix is fine, as that's CLFLUSHOPT). Signed-off-by: Jan Beulich --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -4400,7 +4400,9 @@ x86_emulate( case 0xae: /* Grp15 */ switch ( modrm_reg & 7 ) { -case 7: /* clflush */ +case 7: /* clflush{,opt} */ +fail_if(modrm_mod == 3); +fail_if(rep_prefix()); fail_if(ops->wbinvd == NULL); if ( (rc = ops->wbinvd(ctxt)) != 0 ) goto done; ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] x86: xen: mmu: Remove unused function
On 01/11/2015 11:35 PM, Rickard Strandqvist wrote: Remove the function set_pte_mfn() that is not used anywhere. This was partially found by using a static code analysis program called cppcheck. Sorry, you seem not to have checked the newest kernel. Used by: xen_do_set_identity_and_remap_chunk() xen_remap_memory() So: Nak. Juergen Signed-off-by: Rickard Strandqvist --- arch/x86/xen/mmu.c |9 - arch/x86/xen/mmu.h |2 -- 2 files changed, 11 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a8a1a3d..6959550 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -281,15 +281,6 @@ static void xen_set_pmd(pmd_t *ptr, pmd_t val) xen_set_pmd_hyper(ptr, val); } -/* - * Associate a virtual page frame with a given physical page frame - * and protection flags for that frame. - */ -void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) -{ - set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); -} - static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) { struct mmu_update u; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 73809bb..f2dcf79 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -13,8 +13,6 @@ enum pt_level { bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); -void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] x86/MCE: allow overriding the CMCI threshold
We've had reports of systems where CMCIs would surface at a relatively high rate during certain periods of time, without them apparently causing subsequent more severe problems (see Xeon E7-8800/4800/2800 specification clarification SC1). Give the admin a knob to lower the impact on the system logs. Signed-off-by: Jan Beulich --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -242,6 +242,14 @@ the NMI watchdog is also enabled. If set, override Xen's default choice for the platform timer. +### cmci-threshold +> `= ` + +> Default: `2` + +Specify the event count threshold for raising Corrected Machine Check +Interrupts. Specifying zero disables CMCI handling. + ### cmos-rtc-probe > `= ` --- a/xen/arch/x86/cpu/mcheck/mce_intel.c +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c @@ -492,6 +492,9 @@ static int do_cmci_discover(int i) { unsigned msr = MSR_IA32_MCx_CTL2(i); u64 val; +unsigned int threshold, max_threshold; +static unsigned int cmci_threshold = 2; +integer_param("cmci-threshold", cmci_threshold); rdmsrl(msr, val); /* Some other CPU already owns this bank. */ @@ -500,15 +503,28 @@ static int do_cmci_discover(int i) goto out; } -val &= ~CMCI_THRESHOLD_MASK; -wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD); -rdmsrl(msr, val); +if ( cmci_threshold ) +{ +wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK); +rdmsrl(msr, val); +} if (!(val & CMCI_EN)) { /* This bank does not support CMCI. Polling timer has to handle it. */ mcabanks_set(i, __get_cpu_var(no_cmci_banks)); +wrmsrl(msr, val & ~CMCI_THRESHOLD_MASK); return 0; } +max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK); +threshold = cmci_threshold; +if ( threshold > max_threshold ) +{ + mce_printk(MCE_QUIET, + "CMCI: threshold %#x too large for CPU%u bank %u, using %#x\n", + threshold, smp_processor_id(), i, max_threshold); + threshold = max_threshold; +} +wrmsrl(msr, (val & ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold); mcabanks_set(i, __get_cpu_var(mce_banks_owned)); out: mcabanks_clear(i, __get_cpu_var(no_cmci_banks)); --- a/xen/arch/x86/cpu/mcheck/x86_mca.h +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h @@ -86,9 +86,6 @@ /* Bitfield of MSR_K8_HWCR register */ #define K8_HWCR_MCi_STATUS_WREN(1ULL << 18) -/*Intel Specific bitfield*/ -#define CMCI_THRESHOLD 0x2 - #define MCi_MISC_ADDRMOD_MASK (0x7UL << 6) #define MCi_MISC_PHYSMOD(0x2UL << 6) x86/MCE: allow overriding the CMCI threshold We've had reports of systems where CMCIs would surface at a relatively high rate during certain periods of time, without them apparently causing subsequent more severe problems (see Xeon E7-8800/4800/2800 specification clarification SC1). Give the admin a knob to lower the impact on the system logs. Signed-off-by: Jan Beulich --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -242,6 +242,14 @@ the NMI watchdog is also enabled. If set, override Xen's default choice for the platform timer. +### cmci-threshold +> `= ` + +> Default: `2` + +Specify the event count threshold for raising Corrected Machine Check +Interrupts. Specifying zero disables CMCI handling. + ### cmos-rtc-probe > `= ` --- a/xen/arch/x86/cpu/mcheck/mce_intel.c +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c @@ -492,6 +492,9 @@ static int do_cmci_discover(int i) { unsigned msr = MSR_IA32_MCx_CTL2(i); u64 val; +unsigned int threshold, max_threshold; +static unsigned int cmci_threshold = 2; +integer_param("cmci-threshold", cmci_threshold); rdmsrl(msr, val); /* Some other CPU already owns this bank. */ @@ -500,15 +503,28 @@ static int do_cmci_discover(int i) goto out; } -val &= ~CMCI_THRESHOLD_MASK; -wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD); -rdmsrl(msr, val); +if ( cmci_threshold ) +{ +wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK); +rdmsrl(msr, val); +} if (!(val & CMCI_EN)) { /* This bank does not support CMCI. Polling timer has to handle it. */ mcabanks_set(i, __get_cpu_var(no_cmci_banks)); +wrmsrl(msr, val & ~CMCI_THRESHOLD_MASK); return 0; } +max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK); +threshold = cmci_threshold; +if ( threshold > max_threshold ) +{ + mce_printk(MCE_QUIET, + "CMCI: threshold %#x too large for CPU%u bank %u, using %#x\n", + threshold, smp_processor_id(), i, max_threshold); + threshold = max_threshold; +} +wrmsrl(msr, (val & ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold); mcabanks_set(i, __get_cpu_var(mce_banks_owned)); out: mcabanks_clear(i, __get_cpu_var(no_cmci_banks)); --- a/xen/arch/x86/cpu/mcheck/x86_mca.h +++ b/xen/arch/x
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Jan Beulich [mailto:jbeul...@suse.com] > Sent: Friday, January 09, 2015 6:35 PM > > >>> On 09.01.15 at 11:10, wrote: > >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> Boot time device assignment is different: The question isn't whether > >> an assigned device works, instead the proper analogy is whether a > >> device is _present_. If a device doesn't work on bare metal, it will > >> still be discoverable. Yet if device assignment fails, that's not going > >> to be the case - for security reasons, the guest would not see any > >> notion of the device. > > > > the question is whether we want such device assignment fail due to > > RMRR confliction, and the fail decision should be when Xen handles > > actual assignment instead of when domain builder prepares reserved > > regions. > > Detecting the failure only in the hypervisor has the downside of > potentially leaving the user with little clues as to what went wrong. > Sending messages to the hypervisor log in that case is > questionable, yet the tool stack (namely libxc) is known to not > always do a good job in error propagation. > > >> The question isn't about migrating with devices assigned, but about > >> assigning devices after migration (consider a dual vif + SR-IOV NIC > >> guest setup where the SR-IOV NIC gets hot-removed before > >> migration and a new one hot-plugged afterwards). > >> > >> Furthermore any tying of the guest memory layout to the host's > >> where the guest first boots is awkward, as post-migration there's > >> not going to be any reliable correlation between the guest layout > >> and the new host's. > > > > how can you solve this? like above example, a NIC on node-A leaves > > a reserved region in guest e820. now it's hot-removed and then > > migrated to node-b. there's no way to update e820 again since it's > > only boot structure. then user will still see such awkward regions. > > since it's not avoidable, report-all in the summary mail looks not > > causing a new problem. > > The solution to this are reserved regions specified in the guest config, > independent of host characteristics. > I don't think how reserved regions are specified matter here. My point is that when a region is reserved in e820 at boot time, there's no way to erase that knowledge in the guest even when devices causing that reservation are hot removed later. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] preparing for 4.4.2 / 4.3.4
On Thu, Jan 08, Jan Beulich wrote: > now that 4.5 is almost out the door, I'd like to get stable releases > prepared on the other two active branches. 4.3.4 is expected to be > the last xen.org managed release on the 4.3 branch. Aiming at RC1s > some time next week, please indicate commits you consider relevant > to backport but find missing from the respective branches. e86539a388314cd3dca88f5e69d7873343197cd8 ("libxc: check return values on mmap() and madvise() on xc_alloc_hypercall_buffer()") Olaf ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [linux-3.14 test] 33341: tolerable FAIL - PUSHED
flight 33341 linux-3.14 real [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/33341/ Failures :-/ but no regressions. Regressions which are regarded as allowable (not blocking): test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail like 32448 Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-pvh-intel 9 guest-start fail never pass test-armhf-armhf-libvirt 9 guest-start fail never pass test-amd64-i386-libvirt 9 guest-start fail never pass test-amd64-amd64-xl-pvh-amd 9 guest-start fail never pass test-armhf-armhf-xl 10 migrate-support-checkfail never pass test-amd64-amd64-xl-pcipt-intel 9 guest-start fail never pass test-amd64-amd64-libvirt 9 guest-start fail never pass test-amd64-i386-xl-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-winxpsp3 14 guest-stop fail never pass test-amd64-amd64-xl-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-amd64-xl-winxpsp3 14 guest-stop fail never pass test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass version targeted for testing: linuxc3b70f0bbb6a883f4afa639286043d3f71fbbddf baseline version: linux83a926f7a4e39fb6be0576024e67fe161593defa People who touched revisions under test: "Eric W. Biederman" Andreas Müller Andrew Lunn Andrew Morton Andy Lutomirski Baruch Siach Catalin Marinas Cedric Bosdonnat Chris Mason Christoph Hellwig Dan Carpenter Darrick J. Wong Dmitry Eremin-Solenikov Dmitry Osipenko Eric W. Biederman Filipe Manana Greg Kroah-Hartman H. Peter Anvin Hannes Reinecke Herbert Xu Ingo Molnar Jaehoon Chung James Hogan Jan Kara Jason Cooper Joe Thornber Johannes Berg Josef Bacik Kashyap Desai Lee Jones Linus Torvalds Luis Henriques Michael Halcrow Mike Snitzer Mikulas Patocka Milan Broz Mimi Zohar NeilBrown Oleg Nesterov Paolo Bonzini Paul Moore Peng Tao Peter Guo Rabin Vincent Richard Guy Briggs Richard Weinberger Sumit Saxena sumit.sax...@avagotech.com Takashi Iwai Thierry Reding Thomas Gleixner Trond Myklebust Tyler Hicks Ulf Hansson Uwe Kleine-König Will Deacon Zhang Rui jobs: build-amd64 pass build-armhf pass build-i386 pass build-amd64-libvirt pass build-armhf-libvirt pass build-i386-libvirt pass build-amd64-pvopspass build-armhf-pvopspass build-i386-pvops pass build-amd64-rumpuserxen pass build-i386-rumpuserxen pass test-amd64-amd64-xl pass test-armhf-armhf-xl pass test-amd64-i386-xl pass test-amd64-amd64-xl-pvh-amd fail test-amd64-i386-rhel6hvm-amd pass test-amd64-i386-qemut-rhel6hvm-amd pass test-amd64-i386-qemuu-rhel6hvm-amd pass test-amd64-amd64-xl-qemut-debianhvm-amd64pass test-amd64-i386-xl-qemut-debianhvm-amd64 pass test-amd64-amd64-xl-qemuu-debianhvm-amd64pass test-amd64-i386-xl-qemuu-debianhvm-amd64 pass test-amd64-i386-freebsd10-amd64 pass test-amd64-amd64-xl-qemuu-ovmf-amd64
[Xen-devel] [PATCH] evtchn: simplify sending of notifications
The trivial wrapper evtchn_set_pending() is pretty pointless, as it only serves to invoke another wrapper evtchn_port_set_pending(). In turn, the latter is kind of inconsistent with its siblings in that is takes a struct vcpu * rather than a struct domain * - adjusting this allows for more efficient code in the majority of cases. Signed-off-by: Jan Beulich --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -95,8 +95,6 @@ static uint8_t get_xen_consumer(xen_even /* Get the notification function for a given Xen-bound event channel. */ #define xen_notification_fn(e) (xen_consumers[(e)->xen_consumer-1]) -static void evtchn_set_pending(struct vcpu *v, int port); - static int virq_is_global(uint32_t virq) { int rc; @@ -287,7 +285,7 @@ static long evtchn_bind_interdomain(evtc * We may have lost notifications on the remote unbound port. Fix that up * here by conservatively always setting a notification on the local port. */ -evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport); +evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn); bind->local_port = lport; @@ -599,11 +597,10 @@ static long evtchn_close(evtchn_close_t return __evtchn_close(current->domain, close->port); } -int evtchn_send(struct domain *d, unsigned int lport) +int evtchn_send(struct domain *ld, unsigned int lport) { struct evtchn *lchn, *rchn; -struct domain *ld = d, *rd; -struct vcpu *rvcpu; +struct domain *rd; intrport, ret = 0; spin_lock(&ld->event_lock); @@ -633,14 +630,13 @@ int evtchn_send(struct domain *d, unsign rd= lchn->u.interdomain.remote_dom; rport = lchn->u.interdomain.remote_port; rchn = evtchn_from_port(rd, rport); -rvcpu = rd->vcpu[rchn->notify_vcpu_id]; if ( consumer_is_xen(rchn) ) -(*xen_notification_fn(rchn))(rvcpu, rport); +xen_notification_fn(rchn)(rd->vcpu[rchn->notify_vcpu_id], rport); else -evtchn_set_pending(rvcpu, rport); +evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn); break; case ECS_IPI: -evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport); +evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn); break; case ECS_UNBOUND: /* silently drop the notification */ @@ -655,11 +651,6 @@ out: return ret; } -static void evtchn_set_pending(struct vcpu *v, int port) -{ -evtchn_port_set_pending(v, evtchn_from_port(v->domain, port)); -} - int guest_enabled_event(struct vcpu *v, uint32_t virq) { return ((v != NULL) && (v->virq_to_evtchn[virq] != 0)); @@ -669,6 +660,7 @@ void send_guest_vcpu_virq(struct vcpu *v { unsigned long flags; int port; +struct domain *d; ASSERT(!virq_is_global(virq)); @@ -678,7 +670,8 @@ void send_guest_vcpu_virq(struct vcpu *v if ( unlikely(port == 0) ) goto out; -evtchn_set_pending(v, port); +d = v->domain; +evtchn_port_set_pending(d, v->vcpu_id, evtchn_from_port(d, port)); out: spin_unlock_irqrestore(&v->virq_lock, flags); @@ -707,7 +700,7 @@ static void send_guest_global_virq(struc goto out; chn = evtchn_from_port(d, port); -evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port); +evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); out: spin_unlock_irqrestore(&v->virq_lock, flags); @@ -731,7 +724,7 @@ void send_guest_pirq(struct domain *d, c } chn = evtchn_from_port(d, port); -evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port); +evtchn_port_set_pending(d, chn->notify_vcpu_id, chn); } static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly; @@ -1202,7 +1195,6 @@ void notify_via_xen_event_channel(struct { struct evtchn *lchn, *rchn; struct domain *rd; -intrport; spin_lock(&ld->event_lock); @@ -1219,9 +1211,8 @@ void notify_via_xen_event_channel(struct if ( likely(lchn->state == ECS_INTERDOMAIN) ) { rd= lchn->u.interdomain.remote_dom; -rport = lchn->u.interdomain.remote_port; -rchn = evtchn_from_port(rd, rport); -evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport); +rchn = evtchn_from_port(rd, lchn->u.interdomain.remote_port); +evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn); } spin_unlock(&ld->event_lock); --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru d->evtchn_port_ops->init(d, evtchn); } -static inline void evtchn_port_set_pending(struct vcpu *v, +static inline void evtchn_port_set_pending(struct domain *d, + unsigned int vcpu_id, struct evtchn *evtchn) { -v->domain->evtchn_port_ops->set_pending(v, evtchn); +d->evtchn_port_ops->set_pending(d->vcpu[vcpu_
[Xen-devel] [PATCH] arm64/EFI: minor corrections
- don't bail when using the last slot of bootinfo.mem.bank[] (due to premature incrementing of the array index) - GUIDs should be static const (and placed into .init.* whenever possible) - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly append one to the message passed to the function Signed-off-by: Jan Beulich --- a/xen/arch/arm/efi/efi-boot.h +++ b/xen/arch/arm/efi/efi-boot.h @@ -104,7 +104,7 @@ static int __init fdt_set_reg(void *fdt, static void __init *lookup_fdt_config_table(EFI_SYSTEM_TABLE *sys_table) { -const EFI_GUID fdt_guid = DEVICE_TREE_GUID; +static const EFI_GUID __initconst fdt_guid = DEVICE_TREE_GUID; EFI_CONFIGURATION_TABLE *tables; void *fdt = NULL; int i; @@ -135,15 +135,15 @@ static EFI_STATUS __init efi_process_mem || desc_ptr->Type == EfiBootServicesCode || desc_ptr->Type == EfiBootServicesData ) { -bootinfo.mem.bank[i].start = desc_ptr->PhysicalStart; -bootinfo.mem.bank[i].size = desc_ptr->NumberOfPages * EFI_PAGE_SIZE; -if ( ++i >= NR_MEM_BANKS ) +if ( i >= NR_MEM_BANKS ) { -PrintStr(L"Warning: All "); -DisplayUint(NR_MEM_BANKS, -1); -PrintStr(L" bootinfo mem banks exhausted.\r\n"); +PrintStr(L"Warning: All " __stringify(NR_MEM_BANKS) + " bootinfo mem banks exhausted.\r\n"); break; } +bootinfo.mem.bank[i].start = desc_ptr->PhysicalStart; +bootinfo.mem.bank[i].size = desc_ptr->NumberOfPages * EFI_PAGE_SIZE; +++i; } desc_ptr = NextMemoryDescriptor(desc_ptr, desc_size); } @@ -334,7 +334,7 @@ static void __init efi_arch_process_memo status = fdt_add_uefi_nodes(SystemTable, fdt, map, map_size, desc_size, desc_ver); if ( EFI_ERROR(status) ) -PrintErrMesg(L"Updating FDT failed\r\n", status); +PrintErrMesg(L"Updating FDT failed", status); } static void __init efi_arch_pre_exit_boot(void) @@ -408,7 +408,7 @@ static void __init efi_arch_handle_cmdli status = efi_bs->AllocatePool(EfiBootServicesData, EFI_PAGE_SIZE, (void **)&buf); if ( EFI_ERROR(status) ) -PrintErrMesg(L"Unable to allocate string buffer\r\n", status); +PrintErrMesg(L"Unable to allocate string buffer", status); if ( image_name ) { arm64/EFI: minor corrections - don't bail when using the last slot of bootinfo.mem.bank[] (due to premature incrementing of the array index) - GUIDs should be static const (and placed into .init.* whenever possible) - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly append one to the message passed to the function Signed-off-by: Jan Beulich --- a/xen/arch/arm/efi/efi-boot.h +++ b/xen/arch/arm/efi/efi-boot.h @@ -104,7 +104,7 @@ static int __init fdt_set_reg(void *fdt, static void __init *lookup_fdt_config_table(EFI_SYSTEM_TABLE *sys_table) { -const EFI_GUID fdt_guid = DEVICE_TREE_GUID; +static const EFI_GUID __initconst fdt_guid = DEVICE_TREE_GUID; EFI_CONFIGURATION_TABLE *tables; void *fdt = NULL; int i; @@ -135,15 +135,15 @@ static EFI_STATUS __init efi_process_mem || desc_ptr->Type == EfiBootServicesCode || desc_ptr->Type == EfiBootServicesData ) { -bootinfo.mem.bank[i].start = desc_ptr->PhysicalStart; -bootinfo.mem.bank[i].size = desc_ptr->NumberOfPages * EFI_PAGE_SIZE; -if ( ++i >= NR_MEM_BANKS ) +if ( i >= NR_MEM_BANKS ) { -PrintStr(L"Warning: All "); -DisplayUint(NR_MEM_BANKS, -1); -PrintStr(L" bootinfo mem banks exhausted.\r\n"); +PrintStr(L"Warning: All " __stringify(NR_MEM_BANKS) + " bootinfo mem banks exhausted.\r\n"); break; } +bootinfo.mem.bank[i].start = desc_ptr->PhysicalStart; +bootinfo.mem.bank[i].size = desc_ptr->NumberOfPages * EFI_PAGE_SIZE; +++i; } desc_ptr = NextMemoryDescriptor(desc_ptr, desc_size); } @@ -334,7 +334,7 @@ static void __init efi_arch_process_memo status = fdt_add_uefi_nodes(SystemTable, fdt, map, map_size, desc_size, desc_ver); if ( EFI_ERROR(status) ) -PrintErrMesg(L"Updating FDT failed\r\n", status); +PrintErrMesg(L"Updating FDT failed", status); } static void __init efi_arch_pre_exit_boot(void) @@ -408,7 +408,7 @@ static void __init efi_arch_handle_cmdli status = efi_bs->AllocatePool(EfiBootServicesData, EFI_PAGE_SIZE, (void **)&buf); if ( EFI_ERROR(status) ) -PrintErrMesg(L"Unable to allocate string buffer\r\n", status); +PrintErrMesg(L"Unable to allocate string buffer", status); if ( image_name ) {
[Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects
In particular the .rodata.str2 case is relevant for the EFI boot code (moving around 3k from permanent to init-time sections). Signed-off-by: Jan Beulich --- a/xen/Rules.mk +++ b/xen/Rules.mk @@ -170,7 +170,10 @@ _clean_%/: FORCE %.o: %.S Makefile $(CC) $(AFLAGS) -c $< -o $@ -SPECIAL_DATA_SECTIONS := rodata $(foreach n,1 2 4 8,rodata.str1.$(n)) \ +SPECIAL_DATA_SECTIONS := rodata $(foreach a,1 2 4 8 16, \ + $(foreach w,1 2 4, \ + rodata.str$(w).$(a)) \ + rodata.cst$(a)) \ $(foreach r,rel rel.ro,data.$(r).local) $(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): %.init.o: %.o Makefile extend list of sections convertible to .init.* ones in init-only objects In particular the .rodata.str2 case is relevant for the EFI boot code (moving around 3k from permanent to init-time sections). Signed-off-by: Jan Beulich --- a/xen/Rules.mk +++ b/xen/Rules.mk @@ -170,7 +170,10 @@ _clean_%/: FORCE %.o: %.S Makefile $(CC) $(AFLAGS) -c $< -o $@ -SPECIAL_DATA_SECTIONS := rodata $(foreach n,1 2 4 8,rodata.str1.$(n)) \ +SPECIAL_DATA_SECTIONS := rodata $(foreach a,1 2 4 8 16, \ + $(foreach w,1 2 4, \ + rodata.str$(w).$(a)) \ + rodata.cst$(a)) \ $(foreach r,rel rel.ro,data.$(r).local) $(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): %.init.o: %.o Makefile ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] preparing for 4.4.2 / 4.3.4
>>> On 12.01.15 at 09:47, wrote: > On Thu, Jan 08, Jan Beulich wrote: > >> now that 4.5 is almost out the door, I'd like to get stable releases >> prepared on the other two active branches. 4.3.4 is expected to be >> the last xen.org managed release on the 4.3 branch. Aiming at RC1s >> some time next week, please indicate commits you consider relevant >> to backport but find missing from the respective branches. > > e86539a388314cd3dca88f5e69d7873343197cd8 ("libxc: check return values on > mmap() and madvise() on xc_alloc_hypercall_buffer()") Tool stack backport requests should be Cc-ed to the respective maintainer (now added). Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
Il 09/01/2015 18:38, Dario Faggioli ha scritto: On Fri, 2015-01-09 at 14:42 +, Wei Liu wrote: On Thu, Jan 08, 2015 at 03:33:17PM +0100, Fabio Fantoni wrote: Sorry for the probably stupid question, what are the pros and cons of default use of phy instead qdisk for raw files as domU disk? There's no stupid question. :-) I was told that it performs better and enables other potential improvements. Not a big deal, probably, but IMO it would be nice to have at least a few words about when it's happening in the changelog, wouldn't it? I'm also interested in detailed changelog about. In the meantime, I saw this: http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html Based on the post above seems that phy will have important risk of data loss if I understand good, from Ian Campbell post: xl also uses qdisk for raw disk images instead of loop+blkback which xend used, because there are concerns that the loop driver can lead to data loss (by not implementing direct i/o the underlying device, and/or not handling flushes correct, my memory is a bit fuzzy). Thanks for any reply and sorry for my bad english. Regards, Dario ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Jan Beulich [mailto:jbeul...@suse.com] > Sent: Friday, January 09, 2015 6:46 PM > > >>> On 09.01.15 at 11:26, wrote: > >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> >>> On 09.01.15 at 07:57, wrote: > >> > 1.1) per-device 'warn' vs. global 'warn' > >> > > >> > Both Tim/Jan prefer to 'warn' as a per-device option to the admin instead > >> > of a global option. > >> > > >> > In a glimpse a per-device 'warn' option provides more fine-grained > control > >> > than a global option, however if thinking it carefully allowing one > >> > device > >> > w/ > >> > potential problem isn't more correct or secure than allowing multiple > >> > devices w/ potential problem. Even in practice a device like USB can > >> > work bearing <1MB confliction, like Jan pointed out there's always corner > >> > cases which we might not know so as long as we open door for one > device, > >> > it implies a problematic environment to users and user's judge on > whether > >> > he can live up to this problem is not impacted by how many devices the > door > >> > is opened for (he anyway needs to study warning message and do > >> verification > >> > if choosing to live up) > >> > > >> > Regarding to that, imo if we agree to provide 'warn' option, just > >> > providing > >> > a global overriding option (definitely per-vm) is acceptable and simpler. > >> > >> If the admin determined that ignoring the RMRR requirements for one > >> devices is safe, that doesn't (and shouldn't) mean this is the case for > >> all other devices too. > > > > I don't think admin can determine whether it's 100% safe. What admin can > > decide is whether he lives up to the potential problem based on his purpose > > or based on some experiments. only device vendor knows when and how > > RMRR is used. So as long as warn is opened for one device, I think it > > already means a problem environment and then adding more device is > > just same situation. > > What if the admin consulted the device and BIOS vendors, and got > assured there's not going to be any accesses to the reserved regions > post-boot? consultancy could be still inaccurate, or man-error may happen. > > >> > 1.2) when to 'fail' > >> > > >> > There is one open whether we should fail immediately in domain builder > >> > if a confliction is detected. > >> > > >> > Jan's comment is yes, we should 'fail' the VM creation as it's an error. > >> > > >> > My previous point is more mimicking native behavior, where a device > >> > failure (in our case it's actually potential device failure since VM is > >> > not > >> > powered yet) doesn't impact user until its function is actually touched. > >> > In our case, even domain builder fails to re-arrange guest RAM to skip > >> > reserved regions, we have centralized policy (either 'fail' or 'warn' per > >> > above conclusion) in Xen hypervisor when the device is actually assigned. > >> > so a 'warn' should be fine, but my insist on this is not strong. > >> > >> See my earlier reply: Failure to add a device to me is more like a > >> device preventing a bare metal system from coming up altogether. > > > > not all devices are required for bare metal to boot. it causes problem > > only when it's being used in the boot process. say at powering up the > > disk (insert in the PCI slot) is broken (not sure whether you call such > > thing as 'failure to add a device'), it is only error when BIOS tries to > > read disk. > > Not necessarily. Any malfunctioning device touched by the BIOS, > irrespective of whether the device is needed for booting, can cause > the boot process to hang. Again, the analogy to bare metal is > device presence, not whether the device is functioning properly. > > > note device assignment path is the actual path to decide whether a > > device will be present to the guest. not at this domain build time. > > That would only make a marginal difference in time of when domain > creation fails. it's not marginal difference. instead it's about who owns the policy. to me, detect/avoid conflictions in domain builder is just a preparation for later device assignment (either deterministic static assignment or non-deterministic hotplug). As a preparation, we don't need to make a failure here as a blocker to prevent guest boot. Instead, leave the decision to where device assignment actually happens then hard requirement is made on any conflictions a.t.m. Then we just follow the existing policy of device assignment (either block guest boot, or move forward w/o presenting the device), if confliction is treat as a failure by default (w/o 'warn' override) > > >> > and another point is about hotplug. 'fail' for future devices is too > > strict, > >> > but to differentiate that from static-assigned devices, domain builder > >> > will then need maintain a per-device reserved region structure. just > >> > 'warn' makes things simple. > >> > >> Whereas here I agree - hotplug should just fail (without otherwise > >> impacting the guest). > > > > so
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote: > In the meantime, I saw this: > http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html > Based on the post above seems that phy will have important risk of data > loss if I understand good, from Ian Campbell post: > > xl also uses qdisk for raw disk images instead of loop+blkback which > > xend used, because there are concerns that the loop driver can lead to > > data loss (by not implementing direct i/o the underlying device, and/or > > not handling flushes correct, my memory is a bit fuzzy). Stefano already corrected me on this in this very thread. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
>>> On 12.01.15 at 09:46, wrote: >> From: Jan Beulich [mailto:jbeul...@suse.com] >> Sent: Friday, January 09, 2015 6:35 PM >> >>> On 09.01.15 at 11:10, wrote: >> >> From: Jan Beulich [mailto:jbeul...@suse.com] >> >> The question isn't about migrating with devices assigned, but about >> >> assigning devices after migration (consider a dual vif + SR-IOV NIC >> >> guest setup where the SR-IOV NIC gets hot-removed before >> >> migration and a new one hot-plugged afterwards). >> >> >> >> Furthermore any tying of the guest memory layout to the host's >> >> where the guest first boots is awkward, as post-migration there's >> >> not going to be any reliable correlation between the guest layout >> >> and the new host's. >> > >> > how can you solve this? like above example, a NIC on node-A leaves >> > a reserved region in guest e820. now it's hot-removed and then >> > migrated to node-b. there's no way to update e820 again since it's >> > only boot structure. then user will still see such awkward regions. >> > since it's not avoidable, report-all in the summary mail looks not >> > causing a new problem. >> >> The solution to this are reserved regions specified in the guest config, >> independent of host characteristics. > > I don't think how reserved regions are specified matter here. My point > is that when a region is reserved in e820 at boot time, there's no way > to erase that knowledge in the guest even when devices causing that > reservation are hot removed later. I don't think anyone ever indicated that such erasure would be needed/wanted - I'm not sure how you ended up there. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
Il 12/01/2015 10:31, Ian Campbell ha scritto: On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote: In the meantime, I saw this: http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html Based on the post above seems that phy will have important risk of data loss if I understand good, from Ian Campbell post: xl also uses qdisk for raw disk images instead of loop+blkback which xend used, because there are concerns that the loop driver can lead to data loss (by not implementing direct i/o the underlying device, and/or not handling flushes correct, my memory is a bit fuzzy). Stefano already corrected me on this in this very thread. Ian. Thanks for your reply. I saw other posts about and if I understand good when O_DIRECT patches will be in upstream loop driver the data loss risk will be solved, right? ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Jan Beulich [mailto:jbeul...@suse.com] > Sent: Monday, January 12, 2015 5:33 PM > > >>> On 12.01.15 at 09:46, wrote: > >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> Sent: Friday, January 09, 2015 6:35 PM > >> >>> On 09.01.15 at 11:10, wrote: > >> >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> >> The question isn't about migrating with devices assigned, but about > >> >> assigning devices after migration (consider a dual vif + SR-IOV NIC > >> >> guest setup where the SR-IOV NIC gets hot-removed before > >> >> migration and a new one hot-plugged afterwards). > >> >> > >> >> Furthermore any tying of the guest memory layout to the host's > >> >> where the guest first boots is awkward, as post-migration there's > >> >> not going to be any reliable correlation between the guest layout > >> >> and the new host's. > >> > > >> > how can you solve this? like above example, a NIC on node-A leaves > >> > a reserved region in guest e820. now it's hot-removed and then > >> > migrated to node-b. there's no way to update e820 again since it's > >> > only boot structure. then user will still see such awkward regions. > >> > since it's not avoidable, report-all in the summary mail looks not > >> > causing a new problem. > >> > >> The solution to this are reserved regions specified in the guest config, > >> independent of host characteristics. > > > > I don't think how reserved regions are specified matter here. My point > > is that when a region is reserved in e820 at boot time, there's no way > > to erase that knowledge in the guest even when devices causing that > > reservation are hot removed later. > > I don't think anyone ever indicated that such erasure would be > needed/wanted - I'm not sure how you ended up there. > I ended here to indicate that report-all which gives user more reserved regions than necessary is not a weird case since above scenario can also create such fact. User shouldn't set expectation about reserved region layout. and this argument is necessary to support our proposal of using report-all. :-) Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Konrad Rzeszutek Wilk [mailto:konrad.w...@oracle.com] > Sent: Saturday, January 10, 2015 4:28 AM > > On Thu, Jan 08, 2015 at 06:02:04PM +, George Dunlap wrote: > > On Thu, Jan 8, 2015 at 4:10 PM, Jan Beulich wrote: > > On 08.01.15 at 16:59, wrote: > > >> On Thu, Jan 8, 2015 at 1:54 PM, Jan Beulich wrote: > > the 1st invocation of this interface will save all reported reserved > > regions under domain structure, and later invocation (e.g. from > > hvmloader) gets saved content. > > >>> > > >>> Why would the reserved regions need attaching to the domain > > >>> structure? The combination of (to be) assigned devices and > > >>> global RMRR list always allow reproducing the intended set of > > >>> regions without any extra storage. > > >> > > >> So when you say "(to be) assigned devices", you mean any device which > > >> is currently assigned, *or may be assigned at some point in the > > >> future*? > > > > > > Yes. > > > > > >> Do you think the extra storage for "this VM might possibly be assigned > > >> this device at some point" wouldn't really be that much bigger than > > >> "this VM might possibly map this RMRR at some point in the future"? > > > > > > Since listing devices without RMRR association would be pointless, > > > I think a list of devices would require less storage. But see below. > > > > > >> It seems a lot cleaner to me to have the toolstack tell Xen what > > >> ranges are reserved for RMRR per VM, and then have Xen check again > > >> when assigning a device to make sure that the RMRRs have already been > > >> reserved. > > > > > > With an extra level of what can be got wrong by the admin. > > > However, I now realize that doing it this way would allow > > > specifying regions not associated with any device on the host > > > the guest boots on, but associated with one on a host the guest > > > may later migrate to. > > > > I did say the toolstack, not the admin. :-) > > > > At the xl level, I envisioned a single boolean that would say, "Make > > my memory layout resemble the host system" -- so the MMIO hole would > > be the same size, and all the RMRRs would be reserved. > > Like the e820_host=1 ? :-) > so this is the extension to report-all, not just for reserved regions but for all e820 entries. :-) one thing I'm struggling here (w/ Jan in other threads) is whether reporting all reserved regions on the host can be a default setting to simplify the overall rmrr implementations, given that fact that end user shouldn't set expectation on actual reserved regions. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu
The resource behind an event channel is domain centric rather than vcpu centric, and free_xen_event_channel() only follows the vcpu's domain pointer. This change allows mem_event_disable() to avoid arbitrarily referencing d->vcpu[0] just to pass the domain. No functional change. Signed-off-by: Andrew Cooper CC: Keir Fraser CC: Jan Beulich --- xen/arch/x86/hvm/hvm.c | 12 ++-- xen/common/event_channel.c |4 +--- xen/common/mem_event.c |2 +- xen/include/xen/event.h|3 +-- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 8b06bfd..acfc032 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -647,7 +647,7 @@ static int hvm_ioreq_server_add_vcpu(struct hvm_ioreq_server *s, return 0; fail3: -free_xen_event_channel(v, sv->ioreq_evtchn); +free_xen_event_channel(v->domain, sv->ioreq_evtchn); fail2: spin_unlock(&s->lock); @@ -674,9 +674,9 @@ static void hvm_ioreq_server_remove_vcpu(struct hvm_ioreq_server *s, list_del(&sv->list_entry); if ( v->vcpu_id == 0 && s->bufioreq.va != NULL ) -free_xen_event_channel(v, s->bufioreq_evtchn); +free_xen_event_channel(v->domain, s->bufioreq_evtchn); -free_xen_event_channel(v, sv->ioreq_evtchn); +free_xen_event_channel(v->domain, sv->ioreq_evtchn); xfree(sv); break; @@ -701,9 +701,9 @@ static void hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server *s) list_del(&sv->list_entry); if ( v->vcpu_id == 0 && s->bufioreq.va != NULL ) -free_xen_event_channel(v, s->bufioreq_evtchn); +free_xen_event_channel(v->domain, s->bufioreq_evtchn); -free_xen_event_channel(v, sv->ioreq_evtchn); +free_xen_event_channel(v->domain, sv->ioreq_evtchn); xfree(sv); } @@ -1333,7 +1333,7 @@ static int hvm_replace_event_channel(struct vcpu *v, domid_t remote_domid, /* xchg() ensures that only we call free_xen_event_channel(). */ old_port = xchg(p_port, new_port); -free_xen_event_channel(v, old_port); +free_xen_event_channel(v->domain, old_port); return 0; } diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index 7d6de54..cfe4978 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -1173,11 +1173,9 @@ int alloc_unbound_xen_event_channel( } -void free_xen_event_channel( -struct vcpu *local_vcpu, int port) +void free_xen_event_channel(struct domain *d, int port) { struct evtchn *chn; -struct domain *d = local_vcpu->domain; spin_lock(&d->event_lock); diff --git a/xen/common/mem_event.c b/xen/common/mem_event.c index 16ebdb5..0cc43d7 100644 --- a/xen/common/mem_event.c +++ b/xen/common/mem_event.c @@ -221,7 +221,7 @@ static int mem_event_disable(struct domain *d, struct mem_event_domain *med) } /* Free domU's event channel and leave the other one unbound */ -free_xen_event_channel(d->vcpu[0], med->xen_port); +free_xen_event_channel(d, med->xen_port); /* Unblock all vCPUs */ for_each_vcpu ( d, v ) diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h index 88526f8..0eb1dd3 100644 --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -60,8 +60,7 @@ typedef void (*xen_event_channel_notification_t)( int alloc_unbound_xen_event_channel( struct vcpu *local_vcpu, domid_t remote_domid, xen_event_channel_notification_t notification_fn); -void free_xen_event_channel( -struct vcpu *local_vcpu, int port); +void free_xen_event_channel(struct domain *d, int port); /* Query if event channel is in use by the guest */ int guest_enabled_event(struct vcpu *v, uint32_t virq); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] xen-ringwatch issues
On Fri, 2015-01-09 at 17:00 -0500, moftah moftah wrote: > Hi Ian, > > > OK, I have followed your suggestions and created a new patch Thanks, please submit it in the form describe in the wiki page http://wiki.xen.org/wiki/Submitting_Xen_Patches which I referenced before. In particular, a Signed-off-by is a strict requirement for licensing reasons, to indicate that you have read the DCO (which is at http://wiki.xen.org/wiki/Submitting_Xen_Patches#Signing_off_a_patch ). Please also provide a suitable changelog entry and use a unified diff (diff -u). I think rather than "[-]*" the right addition would be -? (i.e. a single optional negative sign), unless Python's re syntax is lots different to what I expect. > this new patch does fix the issue fully for the negative numbers > > I didnt run the patch on 64bit OS since xenserver has 32bit Dom0 These fields are unsigned int, .i.e. always 32-bits. So I think your "if self.conf < 0: self.cons = 4294967296 + self.cons" etc will work. The Internet also suggests "ctypes.c_uint32(i).value", dunno if that is better. In any case writing 4294967296 as 2**32 would be slightly clearer. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
>>> On 12.01.15 at 10:41, wrote: >> From: Jan Beulich [mailto:jbeul...@suse.com] >> Sent: Monday, January 12, 2015 5:33 PM >> >>> On 12.01.15 at 09:46, wrote: >> >> From: Jan Beulich [mailto:jbeul...@suse.com] >> >> Sent: Friday, January 09, 2015 6:35 PM >> >> >>> On 09.01.15 at 11:10, wrote: >> >> >> From: Jan Beulich [mailto:jbeul...@suse.com] >> >> >> The question isn't about migrating with devices assigned, but about >> >> >> assigning devices after migration (consider a dual vif + SR-IOV NIC >> >> >> guest setup where the SR-IOV NIC gets hot-removed before >> >> >> migration and a new one hot-plugged afterwards). >> >> >> >> >> >> Furthermore any tying of the guest memory layout to the host's >> >> >> where the guest first boots is awkward, as post-migration there's >> >> >> not going to be any reliable correlation between the guest layout >> >> >> and the new host's. >> >> > >> >> > how can you solve this? like above example, a NIC on node-A leaves >> >> > a reserved region in guest e820. now it's hot-removed and then >> >> > migrated to node-b. there's no way to update e820 again since it's >> >> > only boot structure. then user will still see such awkward regions. >> >> > since it's not avoidable, report-all in the summary mail looks not >> >> > causing a new problem. >> >> >> >> The solution to this are reserved regions specified in the guest config, >> >> independent of host characteristics. >> > >> > I don't think how reserved regions are specified matter here. My point >> > is that when a region is reserved in e820 at boot time, there's no way >> > to erase that knowledge in the guest even when devices causing that >> > reservation are hot removed later. >> >> I don't think anyone ever indicated that such erasure would be >> needed/wanted - I'm not sure how you ended up there. >> > > I ended here to indicate that report-all which gives user more reserved > regions than necessary is not a weird case since above scenario can also > create such fact. User shouldn't set expectation about reserved region > layout. and this argument is necessary to support our proposal of using > report-all. :-) The fact that ranges can't be removed from a guest's memory map is irrelevant - there's simply no question that this is that way. The main counter argument against report-all remains: It may result in unnecessarily little low memory in guests not needing all of the host regions to be reserved for them. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Jan Beulich [mailto:jbeul...@suse.com] > Sent: Monday, January 12, 2015 5:51 PM > > >>> On 12.01.15 at 10:41, wrote: > >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> Sent: Monday, January 12, 2015 5:33 PM > >> >>> On 12.01.15 at 09:46, wrote: > >> >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> >> Sent: Friday, January 09, 2015 6:35 PM > >> >> >>> On 09.01.15 at 11:10, wrote: > >> >> >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> >> >> The question isn't about migrating with devices assigned, but about > >> >> >> assigning devices after migration (consider a dual vif + SR-IOV NIC > >> >> >> guest setup where the SR-IOV NIC gets hot-removed before > >> >> >> migration and a new one hot-plugged afterwards). > >> >> >> > >> >> >> Furthermore any tying of the guest memory layout to the host's > >> >> >> where the guest first boots is awkward, as post-migration there's > >> >> >> not going to be any reliable correlation between the guest layout > >> >> >> and the new host's. > >> >> > > >> >> > how can you solve this? like above example, a NIC on node-A leaves > >> >> > a reserved region in guest e820. now it's hot-removed and then > >> >> > migrated to node-b. there's no way to update e820 again since it's > >> >> > only boot structure. then user will still see such awkward regions. > >> >> > since it's not avoidable, report-all in the summary mail looks not > >> >> > causing a new problem. > >> >> > >> >> The solution to this are reserved regions specified in the guest config, > >> >> independent of host characteristics. > >> > > >> > I don't think how reserved regions are specified matter here. My point > >> > is that when a region is reserved in e820 at boot time, there's no way > >> > to erase that knowledge in the guest even when devices causing that > >> > reservation are hot removed later. > >> > >> I don't think anyone ever indicated that such erasure would be > >> needed/wanted - I'm not sure how you ended up there. > >> > > > > I ended here to indicate that report-all which gives user more reserved > > regions than necessary is not a weird case since above scenario can also > > create such fact. User shouldn't set expectation about reserved region > > layout. and this argument is necessary to support our proposal of using > > report-all. :-) > > The fact that ranges can't be removed from a guest's memory map > is irrelevant - there's simply no question that this is that way. The > main counter argument against report-all remains: It may result in > unnecessarily little low memory in guests not needing all of the host > regions to be reserved for them. > the result is related to another open whether we want to block guest boot for such problem. If 'warn' in domain builder is acceptable, we don't need to change lowmem for such rare 1GB case, just throws a warning for unnecessary conflictions (doesn't hurt if user doesn't assign it). Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 00/11] Alternate p2m: support multiple copies of host p2m
>>> On 10.01.15 at 00:04, wrote: > On 01/09/2015 02:41 PM, Andrew Cooper wrote: >> Having some non-OS part of the guest swap the EPT tables and >> accidentally turn a DMA buffer read-only is not going to end well. >> > > The agent can certainly do bad things, and at some level you have to assume it > is sensible enough not to. However, I'm not sure this is fundamentally more > dangerous than what a privileged domain can do today using the MEMOP... > operations, and people are already using those for very similar purposes. I don't follow - how is what privileged domain can do related to the proposed changes here (which are - via VMFUNC - at least partially guest controllable, and that's also the case Andrew mentioned in his reply)? I'm having a hard time understanding how a P2M stripped of anything that's not plain RAM can be very useful to a guest. IOW without such fundamental aspects clarified I don't see a point in looking at the individual patches (which btw, according to your wording elsewhere, should have been marked RFC). Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu
> -Original Message- > From: xen-devel-boun...@lists.xen.org [mailto:xen-devel- > boun...@lists.xen.org] On Behalf Of Andrew Cooper > Sent: 12 January 2015 09:47 > To: Xen-devel > Cc: Andrew Cooper; Keir (Xen.org); Jan Beulich > Subject: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() > to take a domain rather than vcpu > > The resource behind an event channel is domain centric rather than vcpu > centric, and free_xen_event_channel() only follows the vcpu's domain > pointer. > > This change allows mem_event_disable() to avoid arbitrarily referencing > d->vcpu[0] just to pass the domain. > > No functional change. > > Signed-off-by: Andrew Cooper > CC: Keir Fraser > CC: Jan Beulich Reviewed-by: Paul Durrant > --- > xen/arch/x86/hvm/hvm.c | 12 ++-- > xen/common/event_channel.c |4 +--- > xen/common/mem_event.c |2 +- > xen/include/xen/event.h|3 +-- > 4 files changed, 9 insertions(+), 12 deletions(-) > > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index 8b06bfd..acfc032 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -647,7 +647,7 @@ static int hvm_ioreq_server_add_vcpu(struct > hvm_ioreq_server *s, > return 0; > > fail3: > -free_xen_event_channel(v, sv->ioreq_evtchn); > +free_xen_event_channel(v->domain, sv->ioreq_evtchn); > > fail2: > spin_unlock(&s->lock); > @@ -674,9 +674,9 @@ static void hvm_ioreq_server_remove_vcpu(struct > hvm_ioreq_server *s, > list_del(&sv->list_entry); > > if ( v->vcpu_id == 0 && s->bufioreq.va != NULL ) > -free_xen_event_channel(v, s->bufioreq_evtchn); > +free_xen_event_channel(v->domain, s->bufioreq_evtchn); > > -free_xen_event_channel(v, sv->ioreq_evtchn); > +free_xen_event_channel(v->domain, sv->ioreq_evtchn); > > xfree(sv); > break; > @@ -701,9 +701,9 @@ static void > hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server *s) > list_del(&sv->list_entry); > > if ( v->vcpu_id == 0 && s->bufioreq.va != NULL ) > -free_xen_event_channel(v, s->bufioreq_evtchn); > +free_xen_event_channel(v->domain, s->bufioreq_evtchn); > > -free_xen_event_channel(v, sv->ioreq_evtchn); > +free_xen_event_channel(v->domain, sv->ioreq_evtchn); > > xfree(sv); > } > @@ -1333,7 +1333,7 @@ static int hvm_replace_event_channel(struct vcpu > *v, domid_t remote_domid, > > /* xchg() ensures that only we call free_xen_event_channel(). */ > old_port = xchg(p_port, new_port); > -free_xen_event_channel(v, old_port); > +free_xen_event_channel(v->domain, old_port); > return 0; > } > > diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c > index 7d6de54..cfe4978 100644 > --- a/xen/common/event_channel.c > +++ b/xen/common/event_channel.c > @@ -1173,11 +1173,9 @@ int alloc_unbound_xen_event_channel( > } > > > -void free_xen_event_channel( > -struct vcpu *local_vcpu, int port) > +void free_xen_event_channel(struct domain *d, int port) > { > struct evtchn *chn; > -struct domain *d = local_vcpu->domain; > > spin_lock(&d->event_lock); > > diff --git a/xen/common/mem_event.c b/xen/common/mem_event.c > index 16ebdb5..0cc43d7 100644 > --- a/xen/common/mem_event.c > +++ b/xen/common/mem_event.c > @@ -221,7 +221,7 @@ static int mem_event_disable(struct domain *d, > struct mem_event_domain *med) > } > > /* Free domU's event channel and leave the other one unbound */ > -free_xen_event_channel(d->vcpu[0], med->xen_port); > +free_xen_event_channel(d, med->xen_port); > > /* Unblock all vCPUs */ > for_each_vcpu ( d, v ) > diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h > index 88526f8..0eb1dd3 100644 > --- a/xen/include/xen/event.h > +++ b/xen/include/xen/event.h > @@ -60,8 +60,7 @@ typedef void (*xen_event_channel_notification_t)( > int alloc_unbound_xen_event_channel( > struct vcpu *local_vcpu, domid_t remote_domid, > xen_event_channel_notification_t notification_fn); > -void free_xen_event_channel( > -struct vcpu *local_vcpu, int port); > +void free_xen_event_channel(struct domain *d, int port); > > /* Query if event channel is in use by the guest */ > int guest_enabled_event(struct vcpu *v, uint32_t virq); > -- > 1.7.10.4 > > > ___ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote: > Il 12/01/2015 10:31, Ian Campbell ha scritto: > > On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote: > >> In the meantime, I saw this: > >> http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html > >> Based on the post above seems that phy will have important risk of data > >> loss if I understand good, from Ian Campbell post: > >>> xl also uses qdisk for raw disk images instead of loop+blkback which > >>> xend used, because there are concerns that the loop driver can lead to > >>> data loss (by not implementing direct i/o the underlying device, and/or > >>> not handling flushes correct, my memory is a bit fuzzy). > > Stefano already corrected me on this in this very thread. > > > > Ian. > > > > Thanks for your reply. > I saw other posts about and if I understand good when O_DIRECT patches > will be in upstream loop driver the data loss risk will be solved, right? Stefano says that O_DIRECT is not needed, only correct barrier semantics are and he believes those are correctly implemented. O_DIRECT would be an additional layer of safety perhaps, but sounds to be not strictly needed. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu
>>> On 12.01.15 at 10:46, wrote: > The resource behind an event channel is domain centric rather than vcpu > centric, and free_xen_event_channel() only follows the vcpu's domain > pointer. I wonder whether for symmetry alloc_unbound_xen_event_channel() shouldn't then also take a [struct domain *, unsigned int vcpu_id] pair instead of a struct vcpu *. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
>>> On 12.01.15 at 10:56, wrote: > the result is related to another open whether we want to block guest > boot for such problem. If 'warn' in domain builder is acceptable, we > don't need to change lowmem for such rare 1GB case, just throws > a warning for unnecessary conflictions (doesn't hurt if user doesn't > assign it). And how would you then deal with the one guest needing that range reserved? Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu
On 12/01/15 10:07, Jan Beulich wrote: On 12.01.15 at 10:46, wrote: >> The resource behind an event channel is domain centric rather than vcpu >> centric, and free_xen_event_channel() only follows the vcpu's domain >> pointer. > I wonder whether for symmetry alloc_unbound_xen_event_channel() > shouldn't then also take a [struct domain *, unsigned int vcpu_id] > pair instead of a struct vcpu *. Sounds like a good idea - I will do that for v2. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] RFC XSM/evtchn: Never pretend to have successfully created a Xen event channel
This is RFC because explicitly changes the logic introduced by c/s b34f2c375 "xsm: label xen-consumer event channels", and is only compile tested. Xen event channels are not internal resources. They still have one end in a domain, and are created at the request of privileged domains. This logic which "successfully" creates a Xen event channel opens up undesirable failure cases with ill-specified XSM policies. If a domain is permitted to create ioreq servers or memevent listeners, but not to create event channels, the ioreq/memevent creation will succeed but attempting to bind the returned event channel will fail without any indication of a permission error. Signed-off-by: Andrew Cooper CC: Daniel De Graaf CC: Keir Fraser CC: Jan Beulich CC: Tim Deegan CC: Ian Campbell CC: Ian Jackson --- xen/common/event_channel.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index cfe4978..89a7d99 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -1160,11 +1160,13 @@ int alloc_unbound_xen_event_channel( chn = evtchn_from_port(d, port); rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid); +if ( rc ) +goto out; chn->state = ECS_UNBOUND; chn->xen_consumer = get_xen_consumer(notification_fn); chn->notify_vcpu_id = local_vcpu->vcpu_id; -chn->u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID; +chn->u.unbound.remote_domid = remote_domid; out: spin_unlock(&d->event_lock); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Jan Beulich [mailto:jbeul...@suse.com] > Sent: Monday, January 12, 2015 6:09 PM > > >>> On 12.01.15 at 10:56, wrote: > > the result is related to another open whether we want to block guest > > boot for such problem. If 'warn' in domain builder is acceptable, we > > don't need to change lowmem for such rare 1GB case, just throws > > a warning for unnecessary conflictions (doesn't hurt if user doesn't > > assign it). > > And how would you then deal with the one guest needing that > range reserved? > if guest needs the range, then report-all or report-sel doesn't matter. domain builder throws the warning, and later device assignment will fail (or warn w/ override). In reality I think 1GB is rare. Making such assumption to simplify implementation is reasonable. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
Il 12/01/2015 11:06, Ian Campbell ha scritto: On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote: Il 12/01/2015 10:31, Ian Campbell ha scritto: On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote: In the meantime, I saw this: http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html Based on the post above seems that phy will have important risk of data loss if I understand good, from Ian Campbell post: xl also uses qdisk for raw disk images instead of loop+blkback which xend used, because there are concerns that the loop driver can lead to data loss (by not implementing direct i/o the underlying device, and/or not handling flushes correct, my memory is a bit fuzzy). Stefano already corrected me on this in this very thread. Ian. Thanks for your reply. I saw other posts about and if I understand good when O_DIRECT patches will be in upstream loop driver the data loss risk will be solved, right? Stefano says that O_DIRECT is not needed, only correct barrier semantics are and he believes those are correctly implemented. I not understand if manual changes and/or settings are needed, about this I mean: only correct barrier semantics are and he believes those are correctly implemented If yes what exactly? O_DIRECT would be an additional layer of safety perhaps, but sounds to be not strictly needed. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2
On Mon, 2015-01-12 at 11:17 +0100, Fabio Fantoni wrote: > Il 12/01/2015 11:06, Ian Campbell ha scritto: > > On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote: > >> Il 12/01/2015 10:31, Ian Campbell ha scritto: > >>> On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote: > In the meantime, I saw this: > http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html > Based on the post above seems that phy will have important risk of data > loss if I understand good, from Ian Campbell post: > > xl also uses qdisk for raw disk images instead of loop+blkback which > > xend used, because there are concerns that the loop driver can lead to > > data loss (by not implementing direct i/o the underlying device, and/or > > not handling flushes correct, my memory is a bit fuzzy). > >>> Stefano already corrected me on this in this very thread. > >>> > >>> Ian. > >>> > >> Thanks for your reply. > >> I saw other posts about and if I understand good when O_DIRECT patches > >> will be in upstream loop driver the data loss risk will be solved, right? > > Stefano says that O_DIRECT is not needed, only correct barrier semantics > > are and he believes those are correctly implemented. > > I not understand if manual changes and/or settings are needed, about > this I mean: > > only correct barrier semantics are and he believes those are correctly > > implemented > If yes what exactly? If Stefano is correct then it should all just work, no manual steps needed (if manual steps were needed we wouldn't be taking this patch). Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] RFC XSM/evtchn: Never pretend to have successfully created a Xen event channel
On Mon, 2015-01-12 at 10:03 +, Andrew Cooper wrote: > This is RFC because explicitly changes the logic introduced by c/s b34f2c375 > "xsm: label xen-consumer event channels", and is only compile tested. > > Xen event channels are not internal resources. They still have one end in a > domain, and are created at the request of privileged domains. This logic > which "successfully" creates a Xen event channel opens up undesirable failure > cases with ill-specified XSM policies. > > If a domain is permitted to create ioreq servers or memevent listeners, but > not to create event channels, the ioreq/memevent creation will succeed but > attempting to bind the returned event channel will fail without any indication > of a permission error. > > Signed-off-by: Andrew Cooper > CC: Daniel De Graaf > CC: Keir Fraser > CC: Jan Beulich > CC: Tim Deegan > CC: Ian Campbell > CC: Ian Jackson > --- > xen/common/event_channel.c |4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c > index cfe4978..89a7d99 100644 > --- a/xen/common/event_channel.c > +++ b/xen/common/event_channel.c > @@ -1160,11 +1160,13 @@ int alloc_unbound_xen_event_channel( > chn = evtchn_from_port(d, port); > > rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid); > +if ( rc ) > +goto out; out here appears to return port, not rc so you aren't returning failure, but an even more half setup port than before. And I think you need to free the port on failure too. > > chn->state = ECS_UNBOUND; > chn->xen_consumer = get_xen_consumer(notification_fn); > chn->notify_vcpu_id = local_vcpu->vcpu_id; > -chn->u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID; > +chn->u.unbound.remote_domid = remote_domid; > > out: > spin_unlock(&d->event_lock); ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
>>> On 12.01.15 at 11:12, wrote: >> From: Jan Beulich [mailto:jbeul...@suse.com] >> Sent: Monday, January 12, 2015 6:09 PM >> >> >>> On 12.01.15 at 10:56, wrote: >> > the result is related to another open whether we want to block guest >> > boot for such problem. If 'warn' in domain builder is acceptable, we >> > don't need to change lowmem for such rare 1GB case, just throws >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't >> > assign it). >> >> And how would you then deal with the one guest needing that >> range reserved? > > if guest needs the range, then report-all or report-sel doesn't matter. > domain builder throws the warning, and later device assignment will > fail (or warn w/ override). In reality I think 1GB is rare. Making such > assumption to simplify implementation is reasonable. One of my main problems with all you recent argumentation here is the arbitrary use of the 1Gb boundary - there's nothing special in this discussion with where the boundary is. Everything revolves around the (undue) effect of report-all on domains not needing all of the ranges found on the host. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] x86/MCE: allow overriding the CMCI threshold
On 2015/01/12 9:44, Jan Beulich wrote: > We've had reports of systems where CMCIs would surface at a relatively > high rate during certain periods of time, without them apparently > causing subsequent more severe problems (see Xeon E7-8800/4800/2800 > specification clarification SC1). Give the admin a knob to lower the > impact on the system logs. > > Signed-off-by: Jan Beulich A small comment at the bottom, besides of that: Acked-by: Christoph Egger > > --- a/docs/misc/xen-command-line.markdown > +++ b/docs/misc/xen-command-line.markdown > @@ -242,6 +242,14 @@ the NMI watchdog is also enabled. > > If set, override Xen's default choice for the platform timer. > > +### cmci-threshold > +> `= ` > + > +> Default: `2` > + > +Specify the event count threshold for raising Corrected Machine Check > +Interrupts. Specifying zero disables CMCI handling. > + > ### cmos-rtc-probe > > `= ` > > --- a/xen/arch/x86/cpu/mcheck/mce_intel.c > +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c > @@ -492,6 +492,9 @@ static int do_cmci_discover(int i) > { > unsigned msr = MSR_IA32_MCx_CTL2(i); > u64 val; > +unsigned int threshold, max_threshold; > +static unsigned int cmci_threshold = 2; > +integer_param("cmci-threshold", cmci_threshold); > > rdmsrl(msr, val); > /* Some other CPU already owns this bank. */ > @@ -500,15 +503,28 @@ static int do_cmci_discover(int i) > goto out; > } > > -val &= ~CMCI_THRESHOLD_MASK; > -wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD); > -rdmsrl(msr, val); > +if ( cmci_threshold ) > +{ > +wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK); > +rdmsrl(msr, val); > +} > > if (!(val & CMCI_EN)) { > /* This bank does not support CMCI. Polling timer has to handle it. > */ > mcabanks_set(i, __get_cpu_var(no_cmci_banks)); > +wrmsrl(msr, val & ~CMCI_THRESHOLD_MASK); > return 0; > } > +max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK); > +threshold = cmci_threshold; > +if ( threshold > max_threshold ) > +{ > + mce_printk(MCE_QUIET, > + "CMCI: threshold %#x too large for CPU%u bank %u, using > %#x\n", > + threshold, smp_processor_id(), i, max_threshold); > + threshold = max_threshold; > +} > +wrmsrl(msr, (val & ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold); > mcabanks_set(i, __get_cpu_var(mce_banks_owned)); > out: > mcabanks_clear(i, __get_cpu_var(no_cmci_banks)); > --- a/xen/arch/x86/cpu/mcheck/x86_mca.h > +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h > @@ -86,9 +86,6 @@ > /* Bitfield of MSR_K8_HWCR register */ > #define K8_HWCR_MCi_STATUS_WREN (1ULL << 18) > > -/*Intel Specific bitfield*/ > -#define CMCI_THRESHOLD 0x2 > - > #define MCi_MISC_ADDRMOD_MASK (0x7UL << 6) > #define MCi_MISC_PHYSMOD(0x2UL << 6) I think these two are also Intel specific bitfields. Please leave the comment for those. Christoph ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] RFC XSM/evtchn: Never pretend to have successfully created a Xen event channel
On 12/01/15 10:22, Ian Campbell wrote: > On Mon, 2015-01-12 at 10:03 +, Andrew Cooper wrote: >> This is RFC because explicitly changes the logic introduced by c/s b34f2c375 >> "xsm: label xen-consumer event channels", and is only compile tested. >> >> Xen event channels are not internal resources. They still have one end in a >> domain, and are created at the request of privileged domains. This logic >> which "successfully" creates a Xen event channel opens up undesirable failure >> cases with ill-specified XSM policies. >> >> If a domain is permitted to create ioreq servers or memevent listeners, but >> not to create event channels, the ioreq/memevent creation will succeed but >> attempting to bind the returned event channel will fail without any >> indication >> of a permission error. >> >> Signed-off-by: Andrew Cooper >> CC: Daniel De Graaf >> CC: Keir Fraser >> CC: Jan Beulich >> CC: Tim Deegan >> CC: Ian Campbell >> CC: Ian Jackson >> --- >> xen/common/event_channel.c |4 +++- >> 1 file changed, 3 insertions(+), 1 deletion(-) >> >> diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c >> index cfe4978..89a7d99 100644 >> --- a/xen/common/event_channel.c >> +++ b/xen/common/event_channel.c >> @@ -1160,11 +1160,13 @@ int alloc_unbound_xen_event_channel( >> chn = evtchn_from_port(d, port); >> >> rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid); >> +if ( rc ) >> +goto out; > out here appears to return port, not rc so you aren't returning failure, > but an even more half setup port than before. Ah yes - so I am. rc was intended. All callers do correctly check for < 0 to indicate failure. > > And I think you need to free the port on failure too. At the point that we bail, chn->state is still ECS_FREE so there is nothing to deallocate. ~Andrew > >> >> chn->state = ECS_UNBOUND; >> chn->xen_consumer = get_xen_consumer(notification_fn); >> chn->notify_vcpu_id = local_vcpu->vcpu_id; >> -chn->u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID; >> +chn->u.unbound.remote_domid = remote_domid; >> >> out: >> spin_unlock(&d->event_lock); > ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2] x86/HVM: make hvm_efer_valid() honor guest features
On 12/01/15 08:00, Jan Beulich wrote: > Following the earlier similar change validating CR4 modifications. > > Signed-off-by: Jan Beulich Acked-by: Andrew Cooper > --- > v2: consider CR0.PG during restore when checking EFER.LMA > > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -1672,20 +1672,53 @@ static int hvm_save_cpu_ctxt(struct doma > return 0; > } > > -static bool_t hvm_efer_valid(struct domain *d, > - uint64_t value, uint64_t efer_validbits) > +static bool_t hvm_efer_valid(const struct vcpu *v, uint64_t value, > + signed int cr0_pg) > { > -if ( nestedhvm_enabled(d) && cpu_has_svm ) > -efer_validbits |= EFER_SVME; > +unsigned int ext1_ecx = 0, ext1_edx = 0; > > -return !((value & ~efer_validbits) || > - ((sizeof(long) != 8) && (value & EFER_LME)) || > - (!cpu_has_svm && (value & EFER_SVME)) || > - (!cpu_has_nx && (value & EFER_NX)) || > - (!cpu_has_syscall && (value & EFER_SCE)) || > - (!cpu_has_lmsl && (value & EFER_LMSLE)) || > - (!cpu_has_ffxsr && (value & EFER_FFXSE)) || > - ((value & (EFER_LME|EFER_LMA)) == EFER_LMA)); > +if ( cr0_pg < 0 && !is_hardware_domain(v->domain) ) > +{ > +unsigned int level; > + > +ASSERT(v == current); > +hvm_cpuid(0x8000, &level, NULL, NULL, NULL); > +if ( level >= 0x8001 ) > +hvm_cpuid(0x8001, NULL, NULL, &ext1_ecx, &ext1_edx); > +} > +else > +{ > +ext1_edx = boot_cpu_data.x86_capability[X86_FEATURE_LM / 32]; > +ext1_ecx = boot_cpu_data.x86_capability[X86_FEATURE_SVM / 32]; > +} > + > +if ( (value & EFER_SCE) && > + !(ext1_edx & cpufeat_mask(X86_FEATURE_SYSCALL)) ) > +return 0; > + > +if ( (value & (EFER_LME | EFER_LMA)) && > + !(ext1_edx & cpufeat_mask(X86_FEATURE_LM)) ) > +return 0; > + > +if ( cr0_pg > 0 && (value & EFER_LMA) && (!(value & EFER_LME) || > !cr0_pg) ) > +return 0; > + > +if ( (value & EFER_NX) && !(ext1_edx & cpufeat_mask(X86_FEATURE_NX)) ) > +return 0; > + > +if ( (value & EFER_SVME) && > + (!(ext1_ecx & cpufeat_mask(X86_FEATURE_SVM)) || > + !nestedhvm_enabled(v->domain)) ) > +return 0; > + > +if ( (value & EFER_LMSLE) && !cpu_has_lmsl ) > +return 0; > + > +if ( (value & EFER_FFXSE) && > + !(ext1_edx & cpufeat_mask(X86_FEATURE_FFXSR)) ) > +return 0; > + > +return 1; > } > > /* These reserved bits in lower 32 remain 0 after any load of CR0 */ > @@ -1763,7 +1796,6 @@ static int hvm_load_cpu_ctxt(struct doma > struct vcpu *v; > struct hvm_hw_cpu ctxt; > struct segment_register seg; > -uint64_t efer_validbits; > > /* Which vcpu is this? */ > vcpuid = hvm_load_instance(h); > @@ -1794,9 +1826,7 @@ static int hvm_load_cpu_ctxt(struct doma > return -EINVAL; > } > > -efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_LMA > - | EFER_NX | EFER_SCE; > -if ( !hvm_efer_valid(d, ctxt.msr_efer, efer_validbits) ) > +if ( !hvm_efer_valid(v, ctxt.msr_efer, MASK_EXTR(ctxt.cr0, X86_CR0_PG)) ) > { > printk(XENLOG_G_ERR "HVM%d restore: bad EFER %#" PRIx64 "\n", > d->domain_id, ctxt.msr_efer); > @@ -2936,12 +2966,10 @@ err: > int hvm_set_efer(uint64_t value) > { > struct vcpu *v = current; > -uint64_t efer_validbits; > > value &= ~EFER_LMA; > > -efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_NX | EFER_SCE; > -if ( !hvm_efer_valid(v->domain, value, efer_validbits) ) > +if ( !hvm_efer_valid(v, value, -1) ) > { > gdprintk(XENLOG_WARNING, "Trying to set reserved bit in " > "EFER: %#"PRIx64"\n", value); > > > ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v3] x86: also allow REP STOS emulation acceleration
On 12/01/15 08:01, Jan Beulich wrote: > While the REP MOVS acceleration appears to have helped qemu-traditional > based guests, qemu-upstream (or really the respective video BIOSes) > doesn't appear to benefit from that. Instead the acceleration added > here provides a visible performance improvement during very early HVM > guest boot. > > Signed-off-by: Jan Beulich Acked-by: Andrew Cooper > --- > v3: Drop now pointless memory clobber from asm() in hvmemul_rep_stos() > Introduce and use ASSERT_UNREACHABLE(), as suggested by Andrew. > v2: Fix asm() constraints in hvmemul_rep_stos(), as pointed out by > Andrew. Add output operand telling the compiler that "buf" is being > written. > > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -731,6 +731,17 @@ static int hvmemul_rep_movs_discard( > return X86EMUL_OKAY; > } > > +static int hvmemul_rep_stos_discard( > +void *p_data, > +enum x86_segment seg, > +unsigned long offset, > +unsigned int bytes_per_rep, > +unsigned long *reps, > +struct x86_emulate_ctxt *ctxt) > +{ > +return X86EMUL_OKAY; > +} > + > static int hvmemul_rep_outs_discard( > enum x86_segment src_seg, > unsigned long src_offset, > @@ -982,6 +993,113 @@ static int hvmemul_rep_movs( > return X86EMUL_OKAY; > } > > +static int hvmemul_rep_stos( > +void *p_data, > +enum x86_segment seg, > +unsigned long offset, > +unsigned int bytes_per_rep, > +unsigned long *reps, > +struct x86_emulate_ctxt *ctxt) > +{ > +struct hvm_emulate_ctxt *hvmemul_ctxt = > +container_of(ctxt, struct hvm_emulate_ctxt, ctxt); > +unsigned long addr; > +paddr_t gpa; > +p2m_type_t p2mt; > +bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF); > +int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps, > + hvm_access_write, hvmemul_ctxt, > &addr); > + > +if ( rc == X86EMUL_OKAY ) > +{ > +uint32_t pfec = PFEC_page_present | PFEC_write_access; > + > +if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 ) > +pfec |= PFEC_user_mode; > + > +rc = hvmemul_linear_to_phys( > +addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt); > +} > +if ( rc != X86EMUL_OKAY ) > +return rc; > + > +/* Check for MMIO op */ > +(void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt); > + > +switch ( p2mt ) > +{ > +unsigned long bytes; > +void *buf; > + > +default: > +/* Allocate temporary buffer. */ > +for ( ; ; ) > +{ > +bytes = *reps * bytes_per_rep; > +buf = xmalloc_bytes(bytes); > +if ( buf || *reps <= 1 ) > +break; > +*reps >>= 1; > +} > + > +if ( !buf ) > +buf = p_data; > +else > +switch ( bytes_per_rep ) > +{ > +unsigned long dummy; > + > +#define CASE(bits, suffix) \ > +case (bits) / 8: \ > +asm ( "rep stos" #suffix \ > + : "=m" (*(char (*)[bytes])buf), \ > +"=D" (dummy), "=c" (dummy) \ > + : "a" (*(const uint##bits##_t *)p_data), \ > + "1" (buf), "2" (*reps) ); \ > +break > +CASE(8, b); > +CASE(16, w); > +CASE(32, l); > +CASE(64, q); > +#undef CASE > + > +default: > +ASSERT_UNREACHABLE(); > +xfree(buf); > +return X86EMUL_UNHANDLEABLE; > +} > + > +/* Adjust address for reverse store. */ > +if ( df ) > +gpa -= bytes - bytes_per_rep; > + > +rc = hvm_copy_to_guest_phys(gpa, buf, bytes); > + > +if ( buf != p_data ) > +xfree(buf); > + > +switch ( rc ) > +{ > +case HVMCOPY_gfn_paged_out: > +case HVMCOPY_gfn_shared: > +return X86EMUL_RETRY; > +case HVMCOPY_okay: > +return X86EMUL_OKAY; > +} > + > +gdprintk(XENLOG_WARNING, > + "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu > bytes_per_rep=%u\n", > + gpa, *reps, bytes_per_rep); > +/* fall through */ > +case p2m_mmio_direct: > +return X86EMUL_UNHANDLEABLE; > + > +case p2m_mmio_dm: > +return hvmemul_do_mmio(gpa, reps, bytes_per_rep, 0, IOREQ_WRITE, df, > + p_data); > +} > +} > + > static int hvmemul_read_segment( > enum x86_segment seg, > struct segment_register *reg, > @@ -1239,6 +1357,7 @@ static const struct x86_emulate_ops hvm_ > .rep_ins = hvmemul_rep_ins, > .rep_outs = hvmemul_rep_outs, > .
Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications
On 12/01/15 08:57, Jan Beulich wrote: > The trivial wrapper evtchn_set_pending() is pretty pointless, as it > only serves to invoke another wrapper evtchn_port_set_pending(). In > turn, the latter is kind of inconsistent with its siblings in that is > takes a struct vcpu * rather than a struct domain * - adjusting this > allows for more efficient code in the majority of cases. Reviewed-by: David Vrabel David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] x86emul: tighten CLFLUSH emulation
On 12/01/15 08:23, Jan Beulich wrote: > While for us it's not as bad as it was for Linux, their commit > 13e457e0ee ("KVM: x86: Emulator does not decode clflush well", by > Nadav Amit ) nevertheless points out two > shortcomings in our code: opcode 0F AE /7 is clflush only when it uses > a memory mode (otherwise it's SFENCE) and when there's no REP prefix > (an operand size prefix is fine, as that's CLFLUSHOPT). > > Signed-off-by: Jan Beulich Acked-by: Andrew Cooper > > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > @@ -4400,7 +4400,9 @@ x86_emulate( > case 0xae: /* Grp15 */ > switch ( modrm_reg & 7 ) > { > -case 7: /* clflush */ > +case 7: /* clflush{,opt} */ > +fail_if(modrm_mod == 3); > +fail_if(rep_prefix()); > fail_if(ops->wbinvd == NULL); > if ( (rc = ops->wbinvd(ctxt)) != 0 ) > goto done; > > > > > > ___ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] common/memory: fix an XSM error path
On 12/01/15 08:21, Jan Beulich wrote: > XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap > return the extent at which failure was detected, not error indicators. > > Signed-off-by: Jan Beulich Reviewed-by: Andrew Cooper > > --- a/xen/common/memory.c > +++ b/xen/common/memory.c > @@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN > return start_extent; > args.domain = d; > > -rc = xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d); > -if ( rc ) > +if ( xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d) ) > { > rcu_unlock_domain(d); > -return rc; > +return start_extent; > } > > switch ( op ) > > > > > > ___ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [Patch V2 0/4] xen: correct several bugs in new p2m list setup
On 12/01/15 05:05, Juergen Gross wrote: > In the setup code of the linear mapped p2m list several bugs have > been found, especially for 32 bit dom0. These patches correct the > errors and make 32 bit dom0 bootable again. Applied to stable/for-linus-3.19, thanks. David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH net] drivers: net: xen-netfront: remove residual dead code
On 10/01/15 09:20, Vincenzo Maffione wrote: > This patch removes some unused arrays from the netfront private > data structures. These arrays were used in "flip" receive mode. Reviewed-by: David Vrabel Thanks. David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] One question about the hypercall to translate gfn to mfn.
On 09/01/15 08:02, Tian, Kevin wrote: >> From: Tim Deegan [mailto:t...@xen.org] >> Sent: Thursday, January 08, 2015 8:43 PM >> >> Hi, >> Not really. The IOMMU tables are also 64-bit so there must be enough addresses to map all of RAM. There shouldn't be any need for these mappings to be _contiguous_, btw. You just need to have one free address for each mapping. Again, following how grant maps work, I'd imagine that PVH guests will allocate an unused GFN for each mapping and do enough bookkeeping to make sure they don't clash with other GFN users (grant mapping, ballooning, &c). PV guests will probably be given a BFN by the hypervisor at map time (which will be == MFN in practice) and just needs to pass the same BFN to the unmap call later (it can store it in the GTT meanwhile). >>> >>> if possible prefer to make both consistent, i.e. always finding unused GFN? >> >> I don't think it will be possible. PV domains are already using BFNs >> supplied by Xen (in fact == MFN) for backend grant mappings, which >> would conflict with supplying their own for these mappings. But >> again, I think the kernel maintainers for Xen may have a better idea >> of how these interfaces are used inside the kernel. For example, >> it might be easy enough to wrap the two systems inside a common API >> inside linux. Again, following how grant mapping works seems like >> the way forward. >> > > So Konrad, do you have any insight here? :-) Malcolm took two pages of this notebook explaining to me how he thought it should work (in combination with his PV IOMMU work), so I'll let him explain. David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH for 2.3 v2 1/1] xen-hvm: increase maxmem before calling xc_domain_populate_physmap
On Wed, 3 Dec 2014, Don Slutz wrote: > From: Stefano Stabellini > > Increase maxmem before calling xc_domain_populate_physmap_exact to > avoid the risk of running out of guest memory. This way we can also > avoid complex memory calculations in libxl at domain construction > time. > > This patch fixes an abort() when assigning more than 4 NICs to a VM. > > Signed-off-by: Stefano Stabellini > Signed-off-by: Don Slutz > --- > v2: Changes by Don Slutz > Switch from xc_domain_getinfo to xc_domain_getinfolist > Fix error check for xc_domain_getinfolist > Limit increase of maxmem to only do when needed: > Add QEMU_SPARE_PAGES (How many pages to leave free) > Add free_pages calculation > > xen-hvm.c | 19 +++ > 1 file changed, 19 insertions(+) > > diff --git a/xen-hvm.c b/xen-hvm.c > index 7548794..d30e77e 100644 > --- a/xen-hvm.c > +++ b/xen-hvm.c > @@ -90,6 +90,7 @@ static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t > *shared_page, int vcpu) > #endif > > #define BUFFER_IO_MAX_DELAY 100 > +#define QEMU_SPARE_PAGES 16 We need a big comment here to explain why we have this parameter and when we'll be able to get rid of it. Other than that the patch is fine. Thanks! > typedef struct XenPhysmap { > hwaddr start_addr; > @@ -244,6 +245,8 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, > MemoryRegion *mr) > unsigned long nr_pfn; > xen_pfn_t *pfn_list; > int i; > +xc_domaininfo_t info; > +unsigned long free_pages; > > if (runstate_check(RUN_STATE_INMIGRATE)) { > /* RAM already populated in Xen */ > @@ -266,6 +269,22 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, > MemoryRegion *mr) > pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; > } > > +if ((xc_domain_getinfolist(xen_xc, xen_domid, 1, &info) != 1) || > +(info.domain != xen_domid)) { > +hw_error("xc_domain_getinfolist failed"); > +} > +free_pages = info.max_pages - info.tot_pages; > +if (free_pages > QEMU_SPARE_PAGES) { > +free_pages -= QEMU_SPARE_PAGES; > +} else { > +free_pages = 0; > +} > +if ((free_pages < nr_pfn) && > +(xc_domain_setmaxmem(xen_xc, xen_domid, > + ((info.max_pages + nr_pfn - free_pages) > + << (XC_PAGE_SHIFT - 10))) < 0)) { > +hw_error("xc_domain_setmaxmem failed"); > +} > if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, > pfn_list)) { > hw_error("xen: failed to populate ram at " RAM_ADDR_FMT, ram_addr); > } > -- > 1.8.4 > ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Jan Beulich [mailto:jbeul...@suse.com] > Sent: Monday, January 12, 2015 6:23 PM > > >>> On 12.01.15 at 11:12, wrote: > >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> Sent: Monday, January 12, 2015 6:09 PM > >> > >> >>> On 12.01.15 at 10:56, wrote: > >> > the result is related to another open whether we want to block guest > >> > boot for such problem. If 'warn' in domain builder is acceptable, we > >> > don't need to change lowmem for such rare 1GB case, just throws > >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't > >> > assign it). > >> > >> And how would you then deal with the one guest needing that > >> range reserved? > > > > if guest needs the range, then report-all or report-sel doesn't matter. > > domain builder throws the warning, and later device assignment will > > fail (or warn w/ override). In reality I think 1GB is rare. Making such > > assumption to simplify implementation is reasonable. > > One of my main problems with all you recent argumentation here > is the arbitrary use of the 1Gb boundary - there's nothing special > in this discussion with where the boundary is. Everything revolves > around the (undue) effect of report-all on domains not needing all > of the ranges found on the host. > I'm not sure which part of my argument is not clear here. report-all would be a problem here only if we want to fix all the conflictions (then pulling unnecessary devices increases the confliction possibility) in the domain builder. but if we only fix reasonable ones (e.g. >3GB) while warn other conflictions (e.g. <3G) in domain builder (let later assignment path to actually fail if confliction does matter), then we don't need to solve all conflictions in domain builder (if say 1G example fixing it may instead reduce lowmem greatly) and then report-all may just add more warnings than report-sel for unused devices. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Fri, Jan 9, 2015 at 2:43 AM, Tian, Kevin wrote: >> From: George Dunlap >> Sent: Thursday, January 08, 2015 8:55 PM >> >> On Thu, Jan 8, 2015 at 12:49 PM, George Dunlap >> wrote: >> > If RMRRs almost always happen up above 2G, for example, then a simple >> > solution that wouldn't require too much work would be to make sure >> > that the PCI MMIO hole we specify to libxc and to qemu-upstream is big >> > enough to include all RMRRs. That would satisfy the libxc and qemu >> > requirements. >> > >> > If we then store specific RMRRs we want included in xenstore, >> > hvmloader can put them in the e820 map, and that would satisfy the >> > hvmloader requirement. >> >> An alternate thing to do here would be to "properly" fix the >> qemu-upstream problem, by making a way for hvmloader to communicate >> changes in the gpfn layout to qemu. >> >> Then hvmloader could do the work of moving memory under RMRRs to >> higher memory; and libxc wouldn't need to be involved at all. >> >> I think it would also fix our long-standing issues with assigning PCI >> devices to qemu-upstream guests, which up until now have only been >> worked around. >> > > could you elaborate a bit for that long-standing issue? So qemu-traditional didn't particularly expect to know the guest memory layout. qemu-upstream does; it expects to know what areas of memory are guest memory and what areas of memory are unmapped. If a read or write happens to a gpfn which *xen* knows is valid, but which *qemu-upstream* thinks is unmapped, then qemu-upstream will crash. The problem though is that the guest's memory map is not actually communicated to qemu-upstream in any way. Originally, qemu-upstream was only told how much memory the guest had, and it just "happens" to choose the same guest memory layout as the libxc domain builder does. This works, but it is bad design, because if libxc were to change for some reason, people would have to simply remember to also change the qemu-upstream layout. Where this really bites us is in PCI pass-through. The default <4G MMIO hole is very small; and hvmloader naturally expects to be able to make this area larger by relocating memory from below 4G to above 4G. It moves the memory in Xen's p2m, but it has no way of communicating this to qemu-upstream. So when the guest does an MMIO instuction that causes qemu-upstream to access that memory, the guest crashes. There are two work-arounds at the moment: 1. A flag which tells hvmloader not to relocate memory 2. The option to tell qemu-upstream to make the memory hole larger. Both are just work-arounds though; a "proper fix" would be to allow hvmloader some way of telling qemu that the memory has moved, so it can update its memory map. This will (I'm pretty sure) have an effect on RMRR regions as well, for the reasons I've mentioned above: whether make the "holes" for the RMRRs in libxc or in hvmloader, if we *move* that memory up to the top of the address space (rather than, say, just not giving that RAM to the guest), then qemu-upstream's idea of the guest memory map will be wrong, and will probably crash at some point. Having the ability for hvmloader to populate and/or move the memory around, and then tell qemu-upstream what the resulting map looked like, would fix both the MMIO-resize issue and the RMRR problem, wrt qemu-upstream. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH RFC] xen-time: decreasing the rating of the xen clocksource below that of the tsc clocksource for dom0's
On 08/01/15 15:06, Imre Palik wrote: > On 01/07/15 17:30, Ian Campbell wrote: >> On Wed, 2015-01-07 at 17:16 +0100, Imre Palik wrote: >>> From: "Palik, Imre" >>> >>> In Dom0's the use of the TSC clocksource (whenever it is stable enough to >>> be used) instead of the Xen clocksource should not cause any issues, as >>> Dom0 VMs never live-migrated. >> >> Is this still true given that dom0's vcpus are migrated amongst pcpus on >> the host? The tsc are not synchronised on some generations of hardware >> so the result there would be the TSC appearing to do very odd things >> under dom0's feet. Does Linux cope with that or does it not matter for >> some other reason? > > First of all, if the initial pcpus are not synchronised, linux won't consider > TSC as a viable clocksource. > > If the initial pcpus are synchronised, but then the dom0 vcpus are migrated to > unsynchronised pcpus, then hopefully the tsc watchdog catches the issue, and > the kernel falls back to the xen clocksource. The issue here is that the > watchdog can only detect clock skews bigger than 0.0625s/0.5s. Hopefully this > is enough to avoid the weirdest things. I don't think any such hardware exists. Either TSC is synchronized across all CPUs or none. > Also, some parts of the kernel (e.g., scheduling) will always use the paravirt > clock. No matter what priorities are set. > > So it should be safe for some definition of safe. > But I was unable to test it on a hardware platform that would hit the > problematic > case described above. Ok. Can you list the various time sources and their ratings in the commit message for clarity. i.e, to justify 275 (below TSC = 300. above hpet = 250). David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications
On 12/01/15 08:57, Jan Beulich wrote: > --- a/xen/include/xen/event.h > +++ b/xen/include/xen/event.h > @@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru > d->evtchn_port_ops->init(d, evtchn); > } > > -static inline void evtchn_port_set_pending(struct vcpu *v, > +static inline void evtchn_port_set_pending(struct domain *d, > + unsigned int vcpu_id, > struct evtchn *evtchn) I would rename this to the, now vacant, evtchn_set_pending(). It takes an evtchn* not a port. (Its sole caller was evtchn_set_pending(), so the patch won't grow) Furthermore, all callers except send_guest_vcpu_virq() currently use evtchn->notify_vcpu_id to get a struct vcpu* to pass. I think you can drop the vcpu_id parameter and use evtchn->notify_vcpu_id directly, which reduces the likelyhood of a bug where the evtchn is bound to one vcpu but a caller gets the wrong id and raises the event channel on the wrong vcpu. ~Andrew > { > -v->domain->evtchn_port_ops->set_pending(v, evtchn); > +d->evtchn_port_ops->set_pending(d->vcpu[vcpu_id], evtchn); > } > > static inline void evtchn_port_clear_pending(struct domain *d, > > > > > ___ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed
On Wed, 24 Dec 2014, Liang Li wrote: > Use the 'xl pci-attach $DomU $BDF' command to attach more then > one PCI devices to the guest, then detach the devices with > 'xl pci-detach $DomU $BDF', after that, re-attach these PCI > devices again, an error message will be reported like following: > > libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive > an error message from QMP server: Duplicate ID 'pci-pt-03_10.1' > for device. > > The count of calling xen_pt_region_add and xen_pt_region_del are > not the same will cause the XenPCIPassthroughState and it's related > QemuOpts object not be released properly. Thanks for the patch! >From this description, I don't quite understand why the memory_region_ref and memory_region_unref calls are wrong. What do you mean by "The count of calling xen_pt_region_add and xen_pt_region_del are not the same"? On unplug xen_pt_region_del does not get called? Or the memory region argument is not exactly the same as the one initially passed to xen_pt_region_add? > Signed-off-by: Liang Li > Reported-by: Longtao Pang > --- > hw/xen/xen_pt.c | 4 > 1 file changed, 4 deletions(-) > > diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c > index c1bf357..523b8a2 100644 > --- a/hw/xen/xen_pt.c > +++ b/hw/xen/xen_pt.c > @@ -588,7 +588,6 @@ static void xen_pt_region_add(MemoryListener *l, > MemoryRegionSection *sec) > XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, > memory_listener); > > -memory_region_ref(sec->mr); > xen_pt_region_update(s, sec, true); > } > > @@ -598,7 +597,6 @@ static void xen_pt_region_del(MemoryListener *l, > MemoryRegionSection *sec) > memory_listener); > > xen_pt_region_update(s, sec, false); > -memory_region_unref(sec->mr); > } > > static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec) > @@ -606,7 +604,6 @@ static void xen_pt_io_region_add(MemoryListener *l, > MemoryRegionSection *sec) > XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, > io_listener); > > -memory_region_ref(sec->mr); > xen_pt_region_update(s, sec, true); > } > > @@ -616,7 +613,6 @@ static void xen_pt_io_region_del(MemoryListener *l, > MemoryRegionSection *sec) > io_listener); > > xen_pt_region_update(s, sec, false); > -memory_region_unref(sec->mr); > } > > static const MemoryListener xen_pt_memory_listener = { > -- > 1.9.1 > ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c
El 12/01/15 a les 8.09, Bob Liu ha escrit: > > On 01/09/2015 11:51 PM, Roger Pau Monné wrote: >> El 06/01/15 a les 14.19, Bob Liu ha escrit: >>> When there is no enough free grants, gnttab_alloc_grant_references() >>> will fail and block request queue will stop. >>> If the system is always lack of grants, blkif_restart_queue_callback() >>> can't be >>> scheduled and block request queue can't be restart(block I/O hang). >>> >>> But when there are former requests complete, some grants may free to >>> persistent_gnts_c, we can give the request queue another chance to restart >>> and >>> avoid block hang. >>> >>> Reported-by: Junxiao Bi >>> Signed-off-by: Bob Liu >>> --- >>> drivers/block/xen-blkfront.c | 11 +++ >>> 1 file changed, 11 insertions(+) >>> >>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c >>> index 2236c6f..dd30f99 100644 >>> --- a/drivers/block/xen-blkfront.c >>> +++ b/drivers/block/xen-blkfront.c >>> @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, >>> struct blkfront_info *info, >>> } >>> } >>> } >>> + >>> + /* >>> +* Request queue would be stopped if failed to alloc enough grants and >>> +* won't be restarted until gnttab_free_count >= info->callback->count. >>> +* >>> +* But there is another case, once we have enough persistent grants we >>> +* can try to restart the request queue instead of continue to wait for >>> +* 'gnttab_free_count'. >>> +*/ >>> + if (info->persistent_gnts_c >= info->callback.count) >>> + schedule_work(&info->work); >> >> I guess I'm missing something here, but blkif_completion is called by >> blkif_interrupt, which in turn calls kick_pending_request_queues when >> finished, which IMHO should be enough to restart the processing of requests. >> > > You are right, sorry for the mistake. > > The problem we met was a xenblock I/O hang. > Dumped data showed at that time info->persistent_gnt_c = 8, max_gref = 8 > but block request queue was still stopped. > It's very hard to reproduce this issue, we only see it once. > > I think there might be a race condition: > > request A request B: > >info->persistent_gnts_c < max_grefs >and fail to alloc enough grants > > > > interrupt happen, blkif_complte(): > info->persistent_gnts_c++ > kick_pending_request_queues() > > stop block request queue > added to callback list > > If the system don't have enough grants(but have enough persistent_gnts), > request queue would still hang. Not sure how can this happen, blkif_queue_request explicitly checks that persistent_gnts_c < max_grefs before adding the callback and stopping the request queue, so in your case the queue should not be blocked. Can you dump the state of info->connected? Roger. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] Xen 4.5 Development Update (GA slip by a week), GA slip + 1 day
On 9 Jan 2015, at 18:30, Konrad Rzeszutek Wilk wrote: > On Fri, Jan 09, 2015 at 01:51:56PM +, Lars Kurth wrote: >> Note that I published the Acknowledgements and most 4.5 documentation is now >> in place. Bits which you may want to check and fix >> * Spelling of your name if it contains special characters as the scripts >> that I use nuke them - >> http://wiki.xenproject.org/wiki/Xen_Project_4.5_Acknowledgements >> * Some additions to >> http://wiki.xenproject.org/wiki/Xen_Project_Release_Features, which I will do >> >> And we do need to populate the Known Issues section of >> http://wiki.xenproject.org/wiki/Xen_Project_4.5_Release_Notes > > I've added the one that I am aware (systemd). > > In 4.4 we also had this > http://wiki.xenproject.org/wiki?title=Xen_Project_4.4_Feature_List > which was linked of the > http://wiki.xenproject.org/wiki/Xen_Project_4.4_Release_Notes > > I can populate most of htem or we can just reference the blog that will be on > the release day? Konrad, thank you. You don't need to: I created a separate page - see http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List We can just link to ti from the release notes Lars ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
>>> On 12.01.15 at 12:22, wrote: >> From: Jan Beulich [mailto:jbeul...@suse.com] >> Sent: Monday, January 12, 2015 6:23 PM >> One of my main problems with all you recent argumentation here >> is the arbitrary use of the 1Gb boundary - there's nothing special >> in this discussion with where the boundary is. Everything revolves >> around the (undue) effect of report-all on domains not needing all >> of the ranges found on the host. >> > > I'm not sure which part of my argument is not clear here. report-all > would be a problem here only if we want to fix all the conflictions > (then pulling unnecessary devices increases the confliction possibility) > in the domain builder. but if we only fix reasonable ones (e.g. >3GB) > while warn other conflictions (e.g. <3G) in domain builder (let later > assignment path to actually fail if confliction does matter), And have no way for the user to (securely) avoid that failure. Plus the definition of "reasonable" here is of course going to be arbitrary. Jan > then we > don't need to solve all conflictions in domain builder (if say 1G example > fixing it may instead reduce lowmem greatly) and then report-all > may just add more warnings than report-sel for unused devices. > > Thanks > Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Jan Beulich [mailto:jbeul...@suse.com] > Sent: Monday, January 12, 2015 7:37 PM > > >>> On 12.01.15 at 12:22, wrote: > >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> Sent: Monday, January 12, 2015 6:23 PM > >> One of my main problems with all you recent argumentation here > >> is the arbitrary use of the 1Gb boundary - there's nothing special > >> in this discussion with where the boundary is. Everything revolves > >> around the (undue) effect of report-all on domains not needing all > >> of the ranges found on the host. > >> > > > > I'm not sure which part of my argument is not clear here. report-all > > would be a problem here only if we want to fix all the conflictions > > (then pulling unnecessary devices increases the confliction possibility) > > in the domain builder. but if we only fix reasonable ones (e.g. >3GB) > > while warn other conflictions (e.g. <3G) in domain builder (let later > > assignment path to actually fail if confliction does matter), > > And have no way for the user to (securely) avoid that failure. Plus > the definition of "reasonable" here is of course going to be arbitrary. > > Jan actually here I didn't get your point then. It's your proposal to make reasonable assumption like below: --- d) Move down the lowmem RAM/MMIO boundary so that a single, contiguous chunk of lowmem results, with all other RAM moving up beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be considered here, and I think we can reasonably safely assume that no RMRRs will ever report ranges above 1Mb but below the host lowmem RAM/MMIO boundary (i.e. we can presumably rest assured that the lowmem chunk will always be reasonably big). --- Thanks Kevin > > > then we > > don't need to solve all conflictions in domain builder (if say 1G example > > fixing it may instead reduce lowmem greatly) and then report-all > > may just add more warnings than report-sel for unused devices. > > > > Thanks > > Kevin > > ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications
>>> On 12.01.15 at 12:33, wrote: > On 12/01/15 08:57, Jan Beulich wrote: >> --- a/xen/include/xen/event.h >> +++ b/xen/include/xen/event.h >> @@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru >> d->evtchn_port_ops->init(d, evtchn); >> } >> >> -static inline void evtchn_port_set_pending(struct vcpu *v, >> +static inline void evtchn_port_set_pending(struct domain *d, >> + unsigned int vcpu_id, >> struct evtchn *evtchn) > > I would rename this to the, now vacant, evtchn_set_pending(). It takes > an evtchn* not a port. (Its sole caller was evtchn_set_pending(), so > the patch won't grow) No (and I had actually considered it) - that would get its name out of sync with all its sibling wrappers. > Furthermore, all callers except send_guest_vcpu_virq() currently use > evtchn->notify_vcpu_id to get a struct vcpu* to pass. I think you can > drop the vcpu_id parameter and use evtchn->notify_vcpu_id directly, > which reduces the likelyhood of a bug where the evtchn is bound to one > vcpu but a caller gets the wrong id and raises the event channel on the > wrong vcpu. Generally a nice idea, but it doesn't immediately/obviously fit with the use in send_guest_vcpu_virq(). Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed
> From: Stefano Stabellini > Sent: Monday, January 12, 2015 7:36 PM > > On Wed, 24 Dec 2014, Liang Li wrote: > > Use the 'xl pci-attach $DomU $BDF' command to attach more then > > one PCI devices to the guest, then detach the devices with > > 'xl pci-detach $DomU $BDF', after that, re-attach these PCI > > devices again, an error message will be reported like following: > > > > libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive > > an error message from QMP server: Duplicate ID 'pci-pt-03_10.1' > > for device. > > > > The count of calling xen_pt_region_add and xen_pt_region_del are > > not the same will cause the XenPCIPassthroughState and it's related > > QemuOpts object not be released properly. > > Thanks for the patch! > > From this description, I don't quite understand why the > memory_region_ref and memory_region_unref calls are wrong. What do > you > mean by "The count of calling xen_pt_region_add and xen_pt_region_del > are not the same"? > > On unplug xen_pt_region_del does not get called? > Or the memory region argument is not exactly the same as the one > initially passed to xen_pt_region_add? > agree. Liang, could you elaborate how the patch is associated with above explanation? :-) > > > Signed-off-by: Liang Li > > Reported-by: Longtao Pang > > --- > > hw/xen/xen_pt.c | 4 > > 1 file changed, 4 deletions(-) > > > > diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c > > index c1bf357..523b8a2 100644 > > --- a/hw/xen/xen_pt.c > > +++ b/hw/xen/xen_pt.c > > @@ -588,7 +588,6 @@ static void xen_pt_region_add(MemoryListener *l, > MemoryRegionSection *sec) > > XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, > > memory_listener); > > > > -memory_region_ref(sec->mr); > > xen_pt_region_update(s, sec, true); > > } > > > > @@ -598,7 +597,6 @@ static void xen_pt_region_del(MemoryListener *l, > MemoryRegionSection *sec) > > memory_listener); > > > > xen_pt_region_update(s, sec, false); > > -memory_region_unref(sec->mr); > > } > > > > static void xen_pt_io_region_add(MemoryListener *l, > MemoryRegionSection *sec) > > @@ -606,7 +604,6 @@ static void xen_pt_io_region_add(MemoryListener > *l, MemoryRegionSection *sec) > > XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, > > io_listener); > > > > -memory_region_ref(sec->mr); > > xen_pt_region_update(s, sec, true); > > } > > > > @@ -616,7 +613,6 @@ static void xen_pt_io_region_del(MemoryListener *l, > MemoryRegionSection *sec) > > io_listener); > > > > xen_pt_region_update(s, sec, false); > > -memory_region_unref(sec->mr); > > } > > > > static const MemoryListener xen_pt_memory_listener = { > > -- > > 1.9.1 > > > > ___ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
>>> On 12.01.15 at 12:41, wrote: >> From: Jan Beulich [mailto:jbeul...@suse.com] >> Sent: Monday, January 12, 2015 7:37 PM >> >> >>> On 12.01.15 at 12:22, wrote: >> >> From: Jan Beulich [mailto:jbeul...@suse.com] >> >> Sent: Monday, January 12, 2015 6:23 PM >> >> One of my main problems with all you recent argumentation here >> >> is the arbitrary use of the 1Gb boundary - there's nothing special >> >> in this discussion with where the boundary is. Everything revolves >> >> around the (undue) effect of report-all on domains not needing all >> >> of the ranges found on the host. >> >> >> > >> > I'm not sure which part of my argument is not clear here. report-all >> > would be a problem here only if we want to fix all the conflictions >> > (then pulling unnecessary devices increases the confliction possibility) >> > in the domain builder. but if we only fix reasonable ones (e.g. >3GB) >> > while warn other conflictions (e.g. <3G) in domain builder (let later >> > assignment path to actually fail if confliction does matter), >> >> And have no way for the user to (securely) avoid that failure. Plus >> the definition of "reasonable" here is of course going to be arbitrary. > > actually here I didn't get your point then. It's your proposal to make > reasonable assumption like below: > > --- > d) Move down the lowmem RAM/MMIO boundary so that a single, > contiguous chunk of lowmem results, with all other RAM moving up > beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be > considered here, and I think we can reasonably safely assume that > no RMRRs will ever report ranges above 1Mb but below the host > lowmem RAM/MMIO boundary (i.e. we can presumably rest assured > that the lowmem chunk will always be reasonably big). Correct - but my point is that this won't work well with your report-all mechanism, only with the report-sel one. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Fri, 2015-01-09 at 15:27 -0500, Konrad Rzeszutek Wilk wrote: > On Thu, Jan 08, 2015 at 06:02:04PM +, George Dunlap wrote: > > On Thu, Jan 8, 2015 at 4:10 PM, Jan Beulich wrote: > > On 08.01.15 at 16:59, wrote: > > >> On Thu, Jan 8, 2015 at 1:54 PM, Jan Beulich wrote: > > the 1st invocation of this interface will save all reported reserved > > regions under domain structure, and later invocation (e.g. from > > hvmloader) gets saved content. > > >>> > > >>> Why would the reserved regions need attaching to the domain > > >>> structure? The combination of (to be) assigned devices and > > >>> global RMRR list always allow reproducing the intended set of > > >>> regions without any extra storage. > > >> > > >> So when you say "(to be) assigned devices", you mean any device which > > >> is currently assigned, *or may be assigned at some point in the > > >> future*? > > > > > > Yes. > > > > > >> Do you think the extra storage for "this VM might possibly be assigned > > >> this device at some point" wouldn't really be that much bigger than > > >> "this VM might possibly map this RMRR at some point in the future"? > > > > > > Since listing devices without RMRR association would be pointless, > > > I think a list of devices would require less storage. But see below. > > > > > >> It seems a lot cleaner to me to have the toolstack tell Xen what > > >> ranges are reserved for RMRR per VM, and then have Xen check again > > >> when assigning a device to make sure that the RMRRs have already been > > >> reserved. > > > > > > With an extra level of what can be got wrong by the admin. > > > However, I now realize that doing it this way would allow > > > specifying regions not associated with any device on the host > > > the guest boots on, but associated with one on a host the guest > > > may later migrate to. > > > > I did say the toolstack, not the admin. :-) > > > > At the xl level, I envisioned a single boolean that would say, "Make > > my memory layout resemble the host system" -- so the MMIO hole would > > be the same size, and all the RMRRs would be reserved. > > Like the e820_host=1 ? :-) I'd been thinking about that all the way down this thread ;-) It seems like a fairly reasonable approach, and the interfaces (e.g. get host memory e820) are mostly already there. But maybe there are HVM specific reasons why its not... Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin wrote: >> From: Jan Beulich [mailto:jbeul...@suse.com] >> Sent: Monday, January 12, 2015 6:23 PM >> >> >>> On 12.01.15 at 11:12, wrote: >> >> From: Jan Beulich [mailto:jbeul...@suse.com] >> >> Sent: Monday, January 12, 2015 6:09 PM >> >> >> >> >>> On 12.01.15 at 10:56, wrote: >> >> > the result is related to another open whether we want to block guest >> >> > boot for such problem. If 'warn' in domain builder is acceptable, we >> >> > don't need to change lowmem for such rare 1GB case, just throws >> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't >> >> > assign it). >> >> >> >> And how would you then deal with the one guest needing that >> >> range reserved? >> > >> > if guest needs the range, then report-all or report-sel doesn't matter. >> > domain builder throws the warning, and later device assignment will >> > fail (or warn w/ override). In reality I think 1GB is rare. Making such >> > assumption to simplify implementation is reasonable. >> >> One of my main problems with all you recent argumentation here >> is the arbitrary use of the 1Gb boundary - there's nothing special >> in this discussion with where the boundary is. Everything revolves >> around the (undue) effect of report-all on domains not needing all >> of the ranges found on the host. >> > > I'm not sure which part of my argument is not clear here. report-all > would be a problem here only if we want to fix all the conflictions > (then pulling unnecessary devices increases the confliction possibility) > in the domain builder. but if we only fix reasonable ones (e.g. >3GB) > while warn other conflictions (e.g. <3G) in domain builder (let later > assignment path to actually fail if confliction does matter), then we > don't need to solve all conflictions in domain builder (if say 1G example > fixing it may instead reduce lowmem greatly) and then report-all > may just add more warnings than report-sel for unused devices. You keep saying "report-all" or "report-sel", but I'm not 100% clear what you mean by those. In any case, the naming has got to be a bit misleading: the important questions at the moment, AFAICT, are: 1. Whether we make holes at boot time for all RMRRs on the system, or whether only make RMRRs for some subset (or potentially some other arbitrary range, which may include RMRRs on other hosts to which we may want to migrate). 2. Whether those holes are made by the domain builder in libxc, or by hvmloader 3. What happens if Xen is asked to assign a device and it finds that the required RMRR is not empty: a. during guest creation b. after the guest has booted Obviously at some point some part of the toolstack needs to identify which RMRRs go with what device, so that either libxc or hvmloader can make the appropriate holes in the address space; but at that point, "report" is not so much the right word as "query". (Obviously we want to "report" in the e820 map all RMRRs that we've made holes for in the guest.) -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Jan Beulich [mailto:jbeul...@suse.com] > Sent: Monday, January 12, 2015 8:03 PM > > >>> On 12.01.15 at 12:41, wrote: > >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> Sent: Monday, January 12, 2015 7:37 PM > >> > >> >>> On 12.01.15 at 12:22, wrote: > >> >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> >> Sent: Monday, January 12, 2015 6:23 PM > >> >> One of my main problems with all you recent argumentation here > >> >> is the arbitrary use of the 1Gb boundary - there's nothing special > >> >> in this discussion with where the boundary is. Everything revolves > >> >> around the (undue) effect of report-all on domains not needing all > >> >> of the ranges found on the host. > >> >> > >> > > >> > I'm not sure which part of my argument is not clear here. report-all > >> > would be a problem here only if we want to fix all the conflictions > >> > (then pulling unnecessary devices increases the confliction possibility) > >> > in the domain builder. but if we only fix reasonable ones (e.g. >3GB) > >> > while warn other conflictions (e.g. <3G) in domain builder (let later > >> > assignment path to actually fail if confliction does matter), > >> > >> And have no way for the user to (securely) avoid that failure. Plus > >> the definition of "reasonable" here is of course going to be arbitrary. > > > > actually here I didn't get your point then. It's your proposal to make > > reasonable assumption like below: > > > > --- > > d) Move down the lowmem RAM/MMIO boundary so that a single, > > contiguous chunk of lowmem results, with all other RAM moving up > > beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be > > considered here, and I think we can reasonably safely assume that > > no RMRRs will ever report ranges above 1Mb but below the host > > lowmem RAM/MMIO boundary (i.e. we can presumably rest assured > > that the lowmem chunk will always be reasonably big). > > Correct - but my point is that this won't work well with your report-all > mechanism, only with the report-sel one. > I've explained this several times. If there's a violation on above assumption from required devices, same for report-all and report-sel. If the violation is caused by unnecessary devices, please note I'm proposing 'warn' here so report-all at most just adds more warnings in domain builder. the conflict will be caught later if it becomes relevant to be assigned (e.g. thru hotplug). Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 00/11] Alternate p2m: support multiple copies of host p2m
Ed White writes ("[PATCH 00/11] Alternate p2m: support multiple copies of host p2m"): > This set of patches adds support to hvm domains for EPTP switching > by creating multiple copies of the host p2m (currently limited to 10 > copies). Thanks for this. Did you CC me in my capacity as tools maintainer ? I can't see anything in this series which deals with any necessary tools changes. Are there tools parts to come later ? Thanks, Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output
Ian Campbell writes ("[OSSTEST PATCH] mg-debian-installer-update: produce deterministic output"): > Currently rerunning mg-debian-install-update when the external files > have changed still produces differences in the local files produced ^ Missing "not" ? > during post-processing. > > Avoid these differences by: > > - Using gzip -n, which avoids storing a timestamp in the gzip > header (as well as the name, which we don't need). > - Using pax -M norm, which normalises all timestamps (among other > things, such as the owner, which we don't care about) > - Using tar --mtime, with a reference within the dpkg-deb created > hierarchy (which has timestamps from the package and is therefore > dependent only on the downloaded package revision) > > With this the results of two invocations of > mg-debian-installer-update(-all) are identical (assuming no changes to > the downloaded files) as demonstrated by runnign this quick hack: Excellent. Apart from the missing word in the description, Acked-by: Ian Jackson Thanks, Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, 2015-01-12 at 12:13 +, George Dunlap wrote: > On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin wrote: > >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> Sent: Monday, January 12, 2015 6:23 PM > >> > >> >>> On 12.01.15 at 11:12, wrote: > >> >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> >> Sent: Monday, January 12, 2015 6:09 PM > >> >> > >> >> >>> On 12.01.15 at 10:56, wrote: > >> >> > the result is related to another open whether we want to block guest > >> >> > boot for such problem. If 'warn' in domain builder is acceptable, we > >> >> > don't need to change lowmem for such rare 1GB case, just throws > >> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't > >> >> > assign it). > >> >> > >> >> And how would you then deal with the one guest needing that > >> >> range reserved? > >> > > >> > if guest needs the range, then report-all or report-sel doesn't matter. > >> > domain builder throws the warning, and later device assignment will > >> > fail (or warn w/ override). In reality I think 1GB is rare. Making such > >> > assumption to simplify implementation is reasonable. > >> > >> One of my main problems with all you recent argumentation here > >> is the arbitrary use of the 1Gb boundary - there's nothing special > >> in this discussion with where the boundary is. Everything revolves > >> around the (undue) effect of report-all on domains not needing all > >> of the ranges found on the host. > >> > > > > I'm not sure which part of my argument is not clear here. report-all > > would be a problem here only if we want to fix all the conflictions > > (then pulling unnecessary devices increases the confliction possibility) > > in the domain builder. but if we only fix reasonable ones (e.g. >3GB) > > while warn other conflictions (e.g. <3G) in domain builder (let later > > assignment path to actually fail if confliction does matter), then we > > don't need to solve all conflictions in domain builder (if say 1G example > > fixing it may instead reduce lowmem greatly) and then report-all > > may just add more warnings than report-sel for unused devices. > > You keep saying "report-all" or "report-sel", but I'm not 100% clear > what you mean by those. Is the distinction between "all reserved areas" and "only (selectively) those which are related to an RMRR"? That's how I've been reading it... Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output
Ian Campbell writes ("Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output"): > We might like to consider something along these lines for the future: A reasonable idea (although I wonder if it should be configurable). Acked-by: Ian Jackson Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: George Dunlap > Sent: Monday, January 12, 2015 8:14 PM > > On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin wrote: > >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> Sent: Monday, January 12, 2015 6:23 PM > >> > >> >>> On 12.01.15 at 11:12, wrote: > >> >> From: Jan Beulich [mailto:jbeul...@suse.com] > >> >> Sent: Monday, January 12, 2015 6:09 PM > >> >> > >> >> >>> On 12.01.15 at 10:56, wrote: > >> >> > the result is related to another open whether we want to block guest > >> >> > boot for such problem. If 'warn' in domain builder is acceptable, we > >> >> > don't need to change lowmem for such rare 1GB case, just throws > >> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't > >> >> > assign it). > >> >> > >> >> And how would you then deal with the one guest needing that > >> >> range reserved? > >> > > >> > if guest needs the range, then report-all or report-sel doesn't matter. > >> > domain builder throws the warning, and later device assignment will > >> > fail (or warn w/ override). In reality I think 1GB is rare. Making such > >> > assumption to simplify implementation is reasonable. > >> > >> One of my main problems with all you recent argumentation here > >> is the arbitrary use of the 1Gb boundary - there's nothing special > >> in this discussion with where the boundary is. Everything revolves > >> around the (undue) effect of report-all on domains not needing all > >> of the ranges found on the host. > >> > > > > I'm not sure which part of my argument is not clear here. report-all > > would be a problem here only if we want to fix all the conflictions > > (then pulling unnecessary devices increases the confliction possibility) > > in the domain builder. but if we only fix reasonable ones (e.g. >3GB) > > while warn other conflictions (e.g. <3G) in domain builder (let later > > assignment path to actually fail if confliction does matter), then we > > don't need to solve all conflictions in domain builder (if say 1G example > > fixing it may instead reduce lowmem greatly) and then report-all > > may just add more warnings than report-sel for unused devices. > > You keep saying "report-all" or "report-sel", but I'm not 100% clear > what you mean by those. In any case, the naming has got to be a bit > misleading: the important questions at the moment, AFAICT, are: I explained them in original proposal > > 1. Whether we make holes at boot time for all RMRRs on the system, or > whether only make RMRRs for some subset (or potentially some other > arbitrary range, which may include RMRRs on other hosts to which we > may want to migrate). I use 'report-all' to stand for making holes for all RMRRs on the system, while 'report-sel' for specified subset. including other RMRRs (from admin for migration) is orthogonal to above open. > > 2. Whether those holes are made by the domain builder in libxc, or by > hvmloader based on current discussion, whether to make holes in hvmloader doesn't bring fundamental difference. as long as domain builder still need to populate memory (even minimal for hvmloader to boot), it needs to check conflict and may ideally make hole too (though we may make assumption not doing that) > > 3. What happens if Xen is asked to assign a device and it finds that > the required RMRR is not empty: > a. during guest creation > b. after the guest has booted for Xen we don't need differentiate a/b. by default it's clear failure should be returned as it implies a security/correctness issue if moving forward. but based on discussion an override to 'warn' only is preferred, so admin can make decision (remains an open on whether to do global override or per-device override) > > Obviously at some point some part of the toolstack needs to identify > which RMRRs go with what device, so that either libxc or hvmloader can > make the appropriate holes in the address space; but at that point, > "report" is not so much the right word as "query". (Obviously we want > to "report" in the e820 map all RMRRs that we've made holes for in the > guest.) yes, using 'report' doesn't catch all the changes we need to make. Just use them to simplify discussion in case all are on the same page. However clearly my original explanation didn't make it. :/ and state my major intention again. I don't think the preparation (i.e. detect confliction and make holes) for device assignment should be a a blocking failure. Throw warning should be enough (i.e. in libxc). We should let actual device assignment path to make final call based on admin's configuration (default 'fail' w/ 'warn' override). Based on that policy I think 'report-all' (making holes for all host RMRRs) is an acceptable approach, w/ small impact on possibly more warning messages (actually not bad to help admin understand the hotplug possibility on this platform) and show more reserved regions to the end user (but he shouldn't make any assumption here). :-) Thanks Kevin
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Tian, Kevin > Sent: Monday, January 12, 2015 8:29 PM > > > From: George Dunlap > > Sent: Monday, January 12, 2015 8:14 PM > > > > On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin > wrote: > > >> From: Jan Beulich [mailto:jbeul...@suse.com] > > >> Sent: Monday, January 12, 2015 6:23 PM > > >> > > >> >>> On 12.01.15 at 11:12, wrote: > > >> >> From: Jan Beulich [mailto:jbeul...@suse.com] > > >> >> Sent: Monday, January 12, 2015 6:09 PM > > >> >> > > >> >> >>> On 12.01.15 at 10:56, wrote: > > >> >> > the result is related to another open whether we want to block guest > > >> >> > boot for such problem. If 'warn' in domain builder is acceptable, we > > >> >> > don't need to change lowmem for such rare 1GB case, just throws > > >> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't > > >> >> > assign it). > > >> >> > > >> >> And how would you then deal with the one guest needing that > > >> >> range reserved? > > >> > > > >> > if guest needs the range, then report-all or report-sel doesn't matter. > > >> > domain builder throws the warning, and later device assignment will > > >> > fail (or warn w/ override). In reality I think 1GB is rare. Making such > > >> > assumption to simplify implementation is reasonable. > > >> > > >> One of my main problems with all you recent argumentation here > > >> is the arbitrary use of the 1Gb boundary - there's nothing special > > >> in this discussion with where the boundary is. Everything revolves > > >> around the (undue) effect of report-all on domains not needing all > > >> of the ranges found on the host. > > >> > > > > > > I'm not sure which part of my argument is not clear here. report-all > > > would be a problem here only if we want to fix all the conflictions > > > (then pulling unnecessary devices increases the confliction possibility) > > > in the domain builder. but if we only fix reasonable ones (e.g. >3GB) > > > while warn other conflictions (e.g. <3G) in domain builder (let later > > > assignment path to actually fail if confliction does matter), then we > > > don't need to solve all conflictions in domain builder (if say 1G example > > > fixing it may instead reduce lowmem greatly) and then report-all > > > may just add more warnings than report-sel for unused devices. > > > > You keep saying "report-all" or "report-sel", but I'm not 100% clear > > what you mean by those. In any case, the naming has got to be a bit > > misleading: the important questions at the moment, AFAICT, are: > > I explained them in original proposal > > > > > 1. Whether we make holes at boot time for all RMRRs on the system, or > > whether only make RMRRs for some subset (or potentially some other > > arbitrary range, which may include RMRRs on other hosts to which we > > may want to migrate). > > I use 'report-all' to stand for making holes for all RMRRs on the system, > while 'report-sel' for specified subset. > more accurate... 'report-sel' for making holes for RMRRs belonging to specified devices. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output
On Mon, 2015-01-12 at 12:27 +, Ian Jackson wrote: > Ian Campbell writes ("Re: [Xen-devel] [OSSTEST PATCH] > mg-debian-installer-update: produce deterministic output"): > > We might like to consider something along these lines for the future: > > A reasonable idea (although I wonder if it should be configurable). I did consider just inserting a $CURL_OPTS which could be set to include pragma no-cache from the command line. I mostly didn't because I was too lazy to think about the correct shell quoting at the time... > > Acked-by: Ian Jackson > > Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] xl only waits 33 seconds for ballooning to complete
On Thu, Jan 8, 2015 at 1:11 AM, Mike Latimer wrote: > On Wednesday, January 07, 2015 09:38:31 AM Ian Campbell wrote: >> That's exactly what I was about to suggest as I read the penultimate >> paragraph, i.e. keep waiting so long as some reasonable delta occurs on >> each iteration. > > Thanks, Ian. > > I wonder if there is a future-safe threshold on the amount of delta that > indicates progress is being made. Should some minimum safe progress amount or > percentage be set, or is it better to just make sure free memory is increasing > at the end of each iteration of the loop? > > For example, the following simple change just tracks free_memkb and only > decrements the retry count if it has not increased since the last check: > > -- > diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c > index ed0d478..4cf2991 100644 > --- a/tools/libxl/xl_cmdimpl.c > +++ b/tools/libxl/xl_cmdimpl.c > @@ -2196,7 +2196,7 @@ static int preserve_domain(uint32_t *r_domid, > libxl_event *event, > static int freemem(uint32_t domid, libxl_domain_build_info *b_info) > { > int rc, retries = 3; > -uint32_t need_memkb, free_memkb; > +uint32_t need_memkb, free_memkb, free_memkb_prev = 0; > > if (!autoballoon) > return 0; > @@ -2229,7 +2229,10 @@ static int freemem(uint32_t domid, > libxl_domain_build_info *b_info) > if (rc < 0) > return rc; > > -retries--; > +/* only decrement retry count if free_memkb is not increasing */ > +if (free_memkb <= free_memkb_prev) > +retries--; > +free_memkb_prev = free_memkb; I would: 1. Reset the retries after a successful increase 2. Not allow free_memkb_prev to go down. So maybe something like the following? if (free_memkb <= free_memkb_prev) { retries--; } else { retries = MAX_RETRIES; free_memkb_prev = free_memkb; } I'm inclined to say we could add an option to say "wait forever", or to increase the period of the checks; but ultimately at some point someone (either xl or the human) needs to timeout and say, "This is never going to finish". 10s seems like a very conservative default. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [xen-4.5-testing test] 33303: regressions - FAIL
xen.org writes ("[xen-4.5-testing test] 33303: regressions - FAIL"): > flight 33303 xen-4.5-testing real [real] > http://www.chiark.greenend.org.uk/~xensrcts/logs/33303/ > > Regressions :-( > > Tests which did not succeed and are blocking, > including tests which could not be run: > test-amd64-i386-qemuu-rhel6hvm-intel 7 redhat-installfail REGR. vs. > 33264 osstest is very badly overloaded right now so everything is taking a long time. To see where the release was at I peeked at the results of the currently running test (flight 33348) and it looks like it's going to justify a pass. I also compared its results to 33285 (a not-too-bad xen-unstable flight before we branched) and things look reasonably good there too. So I think we are going to be good to go. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
>>> On 12.01.15 at 13:16, wrote: > I've explained this several times. If there's a violation on above > assumption > from required devices, same for report-all and report-sel. If the violation > is > caused by unnecessary devices, please note I'm proposing 'warn' here so > report-all at most just adds more warnings in domain builder. the conflict > will be caught later if it becomes relevant to be assigned (e.g. thru > hotplug). Since we're apparently not understanding one another, please explain with a suitable example how you envision things to behave. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c
On 12/01/15 11:36, Roger Pau Monné wrote: > El 12/01/15 a les 8.09, Bob Liu ha escrit: >> >> On 01/09/2015 11:51 PM, Roger Pau Monné wrote: >>> El 06/01/15 a les 14.19, Bob Liu ha escrit: When there is no enough free grants, gnttab_alloc_grant_references() will fail and block request queue will stop. If the system is always lack of grants, blkif_restart_queue_callback() can't be scheduled and block request queue can't be restart(block I/O hang). But when there are former requests complete, some grants may free to persistent_gnts_c, we can give the request queue another chance to restart and avoid block hang. Reported-by: Junxiao Bi Signed-off-by: Bob Liu --- drivers/block/xen-blkfront.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2236c6f..dd30f99 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, } } } + + /* + * Request queue would be stopped if failed to alloc enough grants and + * won't be restarted until gnttab_free_count >= info->callback->count. + * + * But there is another case, once we have enough persistent grants we + * can try to restart the request queue instead of continue to wait for + * 'gnttab_free_count'. + */ + if (info->persistent_gnts_c >= info->callback.count) + schedule_work(&info->work); >>> >>> I guess I'm missing something here, but blkif_completion is called by >>> blkif_interrupt, which in turn calls kick_pending_request_queues when >>> finished, which IMHO should be enough to restart the processing of requests. >>> >> >> You are right, sorry for the mistake. >> >> The problem we met was a xenblock I/O hang. >> Dumped data showed at that time info->persistent_gnt_c = 8, max_gref = 8 >> but block request queue was still stopped. >> It's very hard to reproduce this issue, we only see it once. >> >> I think there might be a race condition: >> >> request A request B: >> >>info->persistent_gnts_c < max_grefs >>and fail to alloc enough grants >> >> >> >> interrupt happen, blkif_complte(): >> info->persistent_gnts_c++ >> kick_pending_request_queues() >> >> stop block request queue >> added to callback list >> >> If the system don't have enough grants(but have enough persistent_gnts), >> request queue would still hang. > > Not sure how can this happen, blkif_queue_request explicitly checks that > persistent_gnts_c < max_grefs before adding the callback and stopping > the request queue, so in your case the queue should not be blocked. Can > you dump the state of info->connected? I think Bob has correctly identified a race. After calling blk_stop_queue(), check info->persistent_gnts again and restart the queue if there free grefs. David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 01/11] VMX: VMFUNC and #VE definitions and detection.
On 09/01/15 21:26, Ed White wrote: > Currently, neither is enabled globally but may be enabled on a per-VCPU > basis by the altp2m code. > > Everything can be force-disabled globally by specifying vmfunc=0 on the > Xen command line. > > Remove the check for EPTE bit 63 == zero in ept_split_super_page(), as > that bit is now hardware-defined. > > Signed-off-by: Ed White > --- > docs/misc/xen-command-line.markdown | 7 +++ > xen/arch/x86/hvm/vmx/vmcs.c | 40 > + > xen/arch/x86/mm/p2m-ept.c | 1 - > xen/include/asm-x86/hvm/vmx/vmcs.h | 16 +++ > xen/include/asm-x86/hvm/vmx/vmx.h | 13 +++- > xen/include/asm-x86/msr-index.h | 1 + > 6 files changed, 76 insertions(+), 2 deletions(-) > > diff --git a/docs/misc/xen-command-line.markdown > b/docs/misc/xen-command-line.markdown > index 152ae03..00fbae7 100644 > --- a/docs/misc/xen-command-line.markdown > +++ b/docs/misc/xen-command-line.markdown > @@ -1305,6 +1305,13 @@ The optional `keep` parameter causes Xen to continue > using the vga > console even after dom0 has been started. The default behaviour is to > relinquish control to dom0. > > +### vmfunc (Intel) > +> `= ` > + > +> Default: `true` > + > +Use VMFUNC and #VE support if available. > + > ### vpid (Intel) > > `= ` > > diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c > index 9d8033e..4274e92 100644 > --- a/xen/arch/x86/hvm/vmx/vmcs.c > +++ b/xen/arch/x86/hvm/vmx/vmcs.c > @@ -50,6 +50,9 @@ boolean_param("unrestricted_guest", > opt_unrestricted_guest_enabled); > static bool_t __read_mostly opt_apicv_enabled = 1; > boolean_param("apicv", opt_apicv_enabled); > > +static bool_t __read_mostly opt_vmfunc_enabled = 1; > +boolean_param("vmfunc", opt_vmfunc_enabled); Please can experimental features be off by default. (I am specifically looking to avoid the issues we had with apicv getting into stable releases despite reliably causing problems for migration). I suspect you will have many interested testers for this featureset, and it is fine to patch the default later when the feature gets declared stable. I also wonder whether it might be better to have a "vmx=" command line parameter with "vmfunc" as a subopt, to save gaining an ever increasing set of related top level parameters? Other than this, the content of the rest of the patch appears fine. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH v2] [Bugfix] x86/apic: Fix xen IRQ allocation failure caused by commit b81975eade8c
Commit b81975eade8c ("x86, irq: Clean up irqdomain transition code") breaks xen IRQ allocation because xen_smp_prepare_cpus() doesn't invoke setup_IO_APIC(), so no irqdomains created for IOAPICs and mp_map_pin_to_irq() fails at the very beginning. Enhance xen_smp_prepare_cpus() to call setup_IO_APIC() to initialize irqdomain for IOAPICs. Signed-off-by: Jiang Liu --- Hi Sander, Could you please help to test this patch against 3.19-rc3? I have reworked it based Konrad's suggestions. Thanks! Gerry --- arch/x86/kernel/apic/io_apic.c | 31 +++ arch/x86/xen/smp.c |1 + 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3f5f60406ab1..81d4faeb8040 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -38,6 +38,8 @@ #include #include +#include + #include #include #include @@ -2165,6 +2167,9 @@ static inline void __init check_timer(void) unsigned long flags; int no_pin1 = 0; + if (!nr_legacy_irqs()) + return; + local_irq_save(flags); /* @@ -2185,6 +2190,13 @@ static inline void __init check_timer(void) apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); legacy_pic->init(1); + /* +* legacy_pic will be changed to null_legacy_pic if init() fails to +* to detect legacy PIC hardware. Recheck again. +*/ + if (!nr_legacy_irqs()) + goto out; + pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); pin2 = ioapic_i8259.pin; @@ -2369,6 +2381,15 @@ static void ioapic_destroy_irqdomain(int idx) ioapics[idx].pin_info = NULL; } +static void setup_IO_APIC_IDs(void) +{ + if (xen_domain()) + return; + + x86_init.mpparse.setup_ioapic_ids(); + sync_Arb_IDs(); +} + void __init setup_IO_APIC(void) { int ioapic; @@ -2382,16 +2403,10 @@ void __init setup_IO_APIC(void) for_each_ioapic(ioapic) BUG_ON(mp_irqdomain_create(ioapic)); - /* - * Set up IO-APIC IRQ routing. - */ - x86_init.mpparse.setup_ioapic_ids(); - - sync_Arb_IDs(); + setup_IO_APIC_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); - if (nr_legacy_irqs()) - check_timer(); + check_timer(); ioapic_initialized = 1; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 4c071aeb8417..9f404df64422 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -327,6 +327,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) xen_raw_printk(m); panic(m); } + setup_IO_APIC(); xen_init_lock_cpu(0); smp_store_boot_cpu_info(); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed
> > > Use the 'xl pci-attach $DomU $BDF' command to attach more than one > > > PCI devices to the guest, then detach the devices with 'xl > > > pci-detach $DomU $BDF', after that, re-attach these PCI devices > > > again, an error message will be reported like following: > > > > > > libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive an > > > error message from QMP server: Duplicate ID 'pci-pt-03_10.1' > > > for device. > > > > > > The count of calling xen_pt_region_add and xen_pt_region_del are not > > > the same will cause the XenPCIPassthroughState and it's related > > > QemuOpts object not be released properly. > > > > Thanks for the patch! > > > > From this description, I don't quite understand why the > > memory_region_ref and memory_region_unref calls are wrong. What do > > you mean by "The count of calling xen_pt_region_add and > > xen_pt_region_del are not the same"? I means for some memory regions , only the xen_pt_region_add callback function was called while the xen_pt_region_del was not called. > > On unplug xen_pt_region_del does not get called? > > Or the memory region argument is not exactly the same as the one > > initially passed to xen_pt_region_add? > > > > agree. Liang, could you elaborate how the patch is associated with above > explanation? :-) I have verified the following patch can work too: diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..f2893b2 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d) } out: -memory_listener_register(&s->memory_listener, &address_space_memory); +memory_listener_register(&s->memory_listener, + &s->dev.bus_master_as); memory_listener_register(&s->io_listener, &address_space_io); XEN_PT_LOG(d, "Real physical device %02x:%02x.%d registered successfully!\n", By further debugging, I found when using 'address_space_memory', 'xen_pt_region_del' won't be called when the memory region's name is not ' xen-pci-pt-*', when using ' s->dev.bus_master_as ', there is no such issue. I think use the device related address space here is more reasonable, but I am not sure. Could you give some suggestion? Liang ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Fri, 2015-01-09 at 06:57 +, Tian, Kevin wrote: > 3) report-sel vs. report-all One thing I'm not clear on is whether you are suggesting to reserve RMRR (either -all or -sel) for every domain by default, or whether the guest CFG will need to explicitly opt-in, IOW is there a 3rd report-none option which is the default unless otherwise requested (e.g. by e820_host=1, or some other new option)? Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] arm64/EFI: minor corrections
On Mon, 2015-01-12 at 08:59 +, Jan Beulich wrote: > - don't bail when using the last slot of bootinfo.mem.bank[] (due to > premature incrementing of the array index) > - GUIDs should be static const (and placed into .init.* whenever > possible) > - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly > append one to the message passed to the function - Avoid needless use of DisplayUint via __stringify. > Signed-off-by: Jan Beulich Acked-by: Ian Campbell Will you commit yourself or would you like me to? Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
> From: Ian Campbell [mailto:ian.campb...@citrix.com] > Sent: Monday, January 12, 2015 9:42 PM > > On Fri, 2015-01-09 at 06:57 +, Tian, Kevin wrote: > > 3) report-sel vs. report-all > > One thing I'm not clear on is whether you are suggesting to reserve RMRR > (either -all or -sel) for every domain by default, or whether the guest > CFG will need to explicitly opt-in, IOW is there a 3rd report-none > option which is the default unless otherwise requested (e.g. by > e820_host=1, or some other new option)? only when a device is assigned (or potentially prepare for hotplug usage), and report-all/sel is from hypervisor p.o.v to tell userspace about all RMRR regions on this platform or just RMRR regions belonging to specified devices. 'e820_host' only makes holes to prepare (more than RMRR requires). finally we still need query actual reserved regions reported by hypervisor and then mark them reserved in guest e820 table and avoid conflict for PCI BAR allocation etc. Thanks Kevin ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [RFC V9 2/4] domain snapshot overview
On Mon, 2015-01-12 at 00:01 -0700, Chun Yan Liu wrote: > > >>> On 1/8/2015 at 08:26 PM, in message > >>> <1420719995.19787.62.ca...@citrix.com>, Ian > Campbell wrote: > > On Mon, 2014-12-22 at 20:42 -0700, Chun Yan Liu wrote: > > > > > > >>> On 12/19/2014 at 06:25 PM, in message > > <1418984720.20028.15.ca...@citrix.com>, > > > Ian Campbell wrote: > > > > On Thu, 2014-12-18 at 22:45 -0700, Chun Yan Liu wrote: > > > > > > > > > > >>> On 12/18/2014 at 11:10 PM, in message > > > > <1418915443.11882.86.ca...@citrix.com>, > > > > > Ian Campbell wrote: > > > > > > On Tue, 2014-12-16 at 14:32 +0800, Chunyan Liu wrote: > > > > > > > Changes to V8: > > > > > > > * add an overview document, so that one can has a overall look > > > > > > > > > > > > > > about the whole domain snapshot work, limits, requirements, > > > > > > > how to do, etc. > > > > > > > > > > > > > > = > > > > > > > > > > > > > > Domain snapshot overview > > > > > > > > > > > > I don't see a similar section for disk snapshots, are you not > > > > > > considering those here except as a part of a domain snapshot or is > > > > > > this > > > > > > > > an oversight? > > > > > > > > > > > > There are three main use cases (that I know of at least) for > > > > > > snapshotting like behaviour. > > > > > > > > > > > > One is as you've mentioned below for "backup", i.e. to preserve the > > > > > > VM > > > > > > at a certain point in time in order to be able to roll back to it. > > > > > > Is > > > > > > this the only usecase you are considering? > > > > > > > > > > Yes. I didn't take disk snapshot thing into the scope. > > > > > > > > > > > > > > > > > A second use case is to support "gold image" type deployments, i.e. > > > > > > > > > > > > where you create one baseline single disk image and then clone it > > > > > > multiple times to deploy lots of guests. I think this is usually a > > > > > > "disk > > > > > > > > snapshot" type thing, but maybe it can be implemented as restoring > > > > > > a > > > > > > gold domain snapshot multiple times (e.g. for start of day > > > > > > performance > > > > > > reasons). > > > > > > > > > > As we initially discussed about the thing, disk snapshot thing can be > > > > > > > done > > > > > be existing tools directly like qemu-img, vhd-util. > > > > > > > > I was reading this section as a more generic overview of snapshotting, > > > > without reference to where/how things might ultimately be implemented. > > > > > > > > From a design point of view it would be useful to cover the various use > > > > > > > > cases, even if the solution is that the user implements them using CLI > > > > tools by hand (xl) or the toolstack does it for them internally > > > > (libvirt). > > > > > > > > This way we can more clearly see the full picture, which allows us to > > > > validate that we are making the right choices about what goes where. > > > > > > OK. I see. I think this user case is more like how to use the snapshot, > > rather > > > than how to implement snapshot. Right? > > > > Correct, what the user is actually trying to achieve with the > > functionality. > > > > > 'Gold image' or 'Gold domain', the needed work is more like cloning > > > disks. > > > > Yes, or resuming multiple times. > > I see. But IMO it doesn't need change in snapshot design and implementation. > Even resuming multiple times, they couldn't use the same image but duplicate > the image multiple times. Perhaps, but the use case should be included so that this rationale for not worrying about it can be written down (so that people like me don't keep asking...) > > > > > > > > > The third case, (which is similar to the first), is taking a disk > > > > > > snapshot in order to be able to run you usual backup software on > > > > > > the > > > > > > snapshot (which is now unchanging, which is handy) and then > > > > > > deleting the > > > > > > > > disk snapshot (this differs from the first case in which disk is > > > > > > active > > > > > > > > after the snapshot, and due to the lack of the memory part). > > > > > > > > > > Sorry, I'm still not quite clear about what this user case wants to > > > > > do. > > > > > > > > The user has an active domain which they want to backup, but backup > > > > software often does not cope well if the data is changing under its > > > > feet. > > > > > > > > So the users wants to take a snapshot of the domains disks while > > > > leaving > > > > the domain running, so they can backup that static version of the disk > > > > out of band from the VM itself (e.g. by attaching it to a separate > > > > backup VM). > > > > > > Got it. So that's simply disk-only snapshot when domian is active. As you > > > mentioned below
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, Jan 12, 2015 at 11:25:56AM +, George Dunlap wrote: > On Fri, Jan 9, 2015 at 2:43 AM, Tian, Kevin wrote: > >> From: George Dunlap > >> Sent: Thursday, January 08, 2015 8:55 PM > >> > >> On Thu, Jan 8, 2015 at 12:49 PM, George Dunlap > >> wrote: > >> > If RMRRs almost always happen up above 2G, for example, then a simple > >> > solution that wouldn't require too much work would be to make sure > >> > that the PCI MMIO hole we specify to libxc and to qemu-upstream is big > >> > enough to include all RMRRs. That would satisfy the libxc and qemu > >> > requirements. > >> > > >> > If we then store specific RMRRs we want included in xenstore, > >> > hvmloader can put them in the e820 map, and that would satisfy the > >> > hvmloader requirement. > >> > >> An alternate thing to do here would be to "properly" fix the > >> qemu-upstream problem, by making a way for hvmloader to communicate > >> changes in the gpfn layout to qemu. > >> > >> Then hvmloader could do the work of moving memory under RMRRs to > >> higher memory; and libxc wouldn't need to be involved at all. > >> > >> I think it would also fix our long-standing issues with assigning PCI > >> devices to qemu-upstream guests, which up until now have only been > >> worked around. > >> > > > > could you elaborate a bit for that long-standing issue? > > So qemu-traditional didn't particularly expect to know the guest > memory layout. qemu-upstream does; it expects to know what areas of > memory are guest memory and what areas of memory are unmapped. If a > read or write happens to a gpfn which *xen* knows is valid, but which > *qemu-upstream* thinks is unmapped, then qemu-upstream will crash. > > The problem though is that the guest's memory map is not actually > communicated to qemu-upstream in any way. Originally, qemu-upstream > was only told how much memory the guest had, and it just "happens" to > choose the same guest memory layout as the libxc domain builder does. > This works, but it is bad design, because if libxc were to change for > some reason, people would have to simply remember to also change the > qemu-upstream layout. > > Where this really bites us is in PCI pass-through. The default <4G > MMIO hole is very small; and hvmloader naturally expects to be able to > make this area larger by relocating memory from below 4G to above 4G. > It moves the memory in Xen's p2m, but it has no way of communicating > this to qemu-upstream. So when the guest does an MMIO instuction that > causes qemu-upstream to access that memory, the guest crashes. > > There are two work-arounds at the moment: > 1. A flag which tells hvmloader not to relocate memory > 2. The option to tell qemu-upstream to make the memory hole larger. > > Both are just work-arounds though; a "proper fix" would be to allow > hvmloader some way of telling qemu that the memory has moved, so it > can update its memory map. > > This will (I'm pretty sure) have an effect on RMRR regions as well, > for the reasons I've mentioned above: whether make the "holes" for the > RMRRs in libxc or in hvmloader, if we *move* that memory up to the top > of the address space (rather than, say, just not giving that RAM to > the guest), then qemu-upstream's idea of the guest memory map will be > wrong, and will probably crash at some point. > > Having the ability for hvmloader to populate and/or move the memory > around, and then tell qemu-upstream what the resulting map looked > like, would fix both the MMIO-resize issue and the RMRR problem, wrt > qemu-upstream. > Hmm, wasn't this changed slightly during Xen 4.5 development by Don Slutz? You can now specify the mmio_hole size for HVM guests when using qemu-upstream: http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List "Bigger PCI MMIO hole in QEMU via the mmio_hole parameter in guest config, which allows configuring the MMIO size below 4GB. " "Backport pc & q35: Add new machine opt max-ram-below-4g": http://xenbits.xen.org/gitweb/?p=qemu-upstream-unstable.git;a=commit;h=ffdacad07002e14a8072ae28086a57452e48d458 "x86: hvm: Allow configuration of the size of the mmio_hole.": http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=2d927fc41b8e130b3b8910e4442d469d2ac7 -- Pasi > -George > ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, 2015-01-12 at 13:53 +, Tian, Kevin wrote: > > From: Ian Campbell [mailto:ian.campb...@citrix.com] > > Sent: Monday, January 12, 2015 9:42 PM > > > > On Fri, 2015-01-09 at 06:57 +, Tian, Kevin wrote: > > > 3) report-sel vs. report-all > > > > One thing I'm not clear on is whether you are suggesting to reserve RMRR > > (either -all or -sel) for every domain by default, or whether the guest > > CFG will need to explicitly opt-in, IOW is there a 3rd report-none > > option which is the default unless otherwise requested (e.g. by > > e820_host=1, or some other new option)? > > only when a device is assigned (or potentially prepare for hotplug usage), How is this triggered though? Via pci= non-empty in the guest CFG? This sounds like it should behave similarly (in terms of when it is enabled, not necessarily in functionality) to the e820_host boolean, which is set by default if the pci= list is not empty, or can be overridden if the host admin anticipates doing hotplug. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] arm64/EFI: minor corrections
>>> On 12.01.15 at 14:46, wrote: > On Mon, 2015-01-12 at 08:59 +, Jan Beulich wrote: >> - don't bail when using the last slot of bootinfo.mem.bank[] (due to >> premature incrementing of the array index) >> - GUIDs should be static const (and placed into .init.* whenever >> possible) >> - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly >> append one to the message passed to the function > - Avoid needless use of DisplayUint via __stringify. > >> Signed-off-by: Jan Beulich > > Acked-by: Ian Campbell > > Will you commit yourself or would you like me to? I will. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [qemu-mainline bisection] complete test-amd64-amd64-xl-qemuu-win7-amd64
branch xen-unstable xen branch xen-unstable job test-amd64-amd64-xl-qemuu-win7-amd64 test windows-install Tree: linux git://xenbits.xen.org/linux-pvops.git Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git Tree: qemuu git://git.qemu.org/qemu.git Tree: xen git://xenbits.xen.org/xen.git *** Found and reproduced problem changeset *** Bug is in tree: qemuu git://git.qemu.org/qemu.git Bug introduced: 49d2e648e8087d154d8bf8b91f27c8e05e79d5a6 Bug not present: 60fb1a87b47b14e4ea67043aa56f353e77fbd70a commit 49d2e648e8087d154d8bf8b91f27c8e05e79d5a6 Author: Marcel Apfelbaum Date: Tue Dec 16 16:58:05 2014 + machine: remove qemu_machine_opts global list QEMU has support for options per machine, keeping a global list of options is no longer necessary. Signed-off-by: Marcel Apfelbaum Reviewed-by: Alexander Graf Reviewed-by: Greg Bellows Message-id: 1418217570-15517-2-git-send-email-marce...@redhat.com Signed-off-by: Peter Maydell For bisection revision-tuple graph see: http://www.chiark.greenend.org.uk/~xensrcts/results/bisect.qemu-mainline.test-amd64-amd64-xl-qemuu-win7-amd64.windows-install.html Revision IDs in each graph node refer, respectively, to the Trees above. Searching for failure / basis pass: 33123 fail [host=itch-mite] / 32598 ok. Failure / basis pass flights: 33123 / 32598 (tree with no url: seabios) Tree: linux git://xenbits.xen.org/linux-pvops.git Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git Tree: qemuu git://git.qemu.org/qemu.git Tree: xen git://xenbits.xen.org/xen.git Latest 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b Basis pass 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 7e58e2ac7778cca3234c33387e49577bb7732714 36174af3fbeb1b662c0eadbfa193e77f68cc955b Generating revisions with ./adhoc-revtuple-generator git://xenbits.xen.org/linux-pvops.git#83a926f7a4e39fb6be0576024e67fe161593defa-83a926f7a4e39fb6be0576024e67fe161593defa git://xenbits.xen.org/osstest/linux-firmware.git#c530a75c1e6a472b0eb9558310b518f0dfcd8860-c530a75c1e6a472b0eb9558310b518f0dfcd8860 git://xenbits.xen.org/staging/qemu-xen-unstable.git#b0d42741f8e9a00854c3b3faca1da84bfc69bf22-b0d42741f8e9a00854c3b3faca1da84bfc69bf22 git://git.qemu.org/qemu.git#7e58e2ac7778cca3234c33387e49577bb7732714-ab0302ee764fd702465aef6d88612cdff4302809 git://xenbits.xen.org/xen.git#36174af3fbeb1b662c0eadbfa193e77f68cc955b-36174af3fbeb1b662c0eadbfa193e77f68cc955b + exec + sh -xe + cd /export/home/osstest/repos/qemu + git remote set-url origin git://drall.uk.xensource.com:9419/git://git.qemu.org/qemu.git + git fetch -p origin +refs/heads/*:refs/remotes/origin/* + exec + sh -xe + cd /export/home/osstest/repos/qemu + git remote set-url origin git://drall.uk.xensource.com:9419/git://git.qemu.org/qemu.git + git fetch -p origin +refs/heads/*:refs/remotes/origin/* Loaded 1005 nodes in revision graph Searching for test results: 32585 pass irrelevant 32598 pass 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 7e58e2ac7778cca3234c33387e49577bb7732714 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32611 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32626 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32689 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32659 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32876 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32854 fail 83a926f7a4e39fb6be0576024e67fe161593defa c530a75c1e6a472b0eb9558310b518f0dfcd8860 b0d42741f8e9a00854c3b3faca1da84bfc69bf22 ab0302ee764fd702465aef6d88612cdff4302809 36174af3fbeb1b662c0eadbfa193e77f68cc955b 32908 fail 83a926f7a4e39fb6be0576024e67fe161
Re: [Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects
On Mon, 2015-01-12 at 09:00 +, Jan Beulich wrote: > In particular the .rodata.str2 case is relevant for the EFI boot code > (moving around 3k from permanent to init-time sections). So we go from handling .rodata.str1.{1,2,3,8} to .rodata.str{1,2,4}.{1,2,4,8,16}? > Signed-off-by: Jan Beulich Acked-by: Ian Campbell ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects
>>> On 12.01.15 at 15:03, wrote: > On Mon, 2015-01-12 at 09:00 +, Jan Beulich wrote: >> In particular the .rodata.str2 case is relevant for the EFI boot code >> (moving around 3k from permanent to init-time sections). > > So we go from handling .rodata.str1.{1,2,3,8} > to .rodata.str{1,2,4}.{1,2,4,8,16}? These plus .rodata.cst{1,2,4,8,16}. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, Jan 12, 2015 at 12:28 PM, Tian, Kevin wrote: >> From: George Dunlap >> Sent: Monday, January 12, 2015 8:14 PM >> >> On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin wrote: >> >> From: Jan Beulich [mailto:jbeul...@suse.com] >> >> Sent: Monday, January 12, 2015 6:23 PM >> >> >> >> >>> On 12.01.15 at 11:12, wrote: >> >> >> From: Jan Beulich [mailto:jbeul...@suse.com] >> >> >> Sent: Monday, January 12, 2015 6:09 PM >> >> >> >> >> >> >>> On 12.01.15 at 10:56, wrote: >> >> >> > the result is related to another open whether we want to block guest >> >> >> > boot for such problem. If 'warn' in domain builder is acceptable, we >> >> >> > don't need to change lowmem for such rare 1GB case, just throws >> >> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't >> >> >> > assign it). >> >> >> >> >> >> And how would you then deal with the one guest needing that >> >> >> range reserved? >> >> > >> >> > if guest needs the range, then report-all or report-sel doesn't matter. >> >> > domain builder throws the warning, and later device assignment will >> >> > fail (or warn w/ override). In reality I think 1GB is rare. Making such >> >> > assumption to simplify implementation is reasonable. >> >> >> >> One of my main problems with all you recent argumentation here >> >> is the arbitrary use of the 1Gb boundary - there's nothing special >> >> in this discussion with where the boundary is. Everything revolves >> >> around the (undue) effect of report-all on domains not needing all >> >> of the ranges found on the host. >> >> >> > >> > I'm not sure which part of my argument is not clear here. report-all >> > would be a problem here only if we want to fix all the conflictions >> > (then pulling unnecessary devices increases the confliction possibility) >> > in the domain builder. but if we only fix reasonable ones (e.g. >3GB) >> > while warn other conflictions (e.g. <3G) in domain builder (let later >> > assignment path to actually fail if confliction does matter), then we >> > don't need to solve all conflictions in domain builder (if say 1G example >> > fixing it may instead reduce lowmem greatly) and then report-all >> > may just add more warnings than report-sel for unused devices. >> >> You keep saying "report-all" or "report-sel", but I'm not 100% clear >> what you mean by those. In any case, the naming has got to be a bit >> misleading: the important questions at the moment, AFAICT, are: > > I explained them in original proposal Yes, I read it and didn't understand it there either. :-) >> 1. Whether we make holes at boot time for all RMRRs on the system, or >> whether only make RMRRs for some subset (or potentially some other >> arbitrary range, which may include RMRRs on other hosts to which we >> may want to migrate). > > I use 'report-all' to stand for making holes for all RMRRs on the system, > while 'report-sel' for specified subset. > > including other RMRRs (from admin for migration) is orthogonal to > above open. Right; so the "report" in this case is "report to the guest". As I said, I think that's confusing terminology; after all, we want to report to the guest all holes that we make, and only the holes that we make. The question isn't then which ones we report, but which ones we make holes for. :-) So for this discussion, maybe "rmrr-host" (meaning, copy RMRRs from the host) or "rmrr-sel" (meaning, specify a selection of RMRRs, which may be from this host, or even another host)? Given that the ranges may be of arbitrary size, and that we may want to specify additional ranges for migration to other hosts, then I think we need at some level we need the machinery to be in place to specify the RMRRs that will be reserved for a specific guest. At the xl level, there should of course be a way to specify "use all host RMRRs"; but what should happen then is that xl / libxl should query Xen for the host RMRRs and then pass those down to the next layer of the library. >> 2. Whether those holes are made by the domain builder in libxc, or by >> hvmloader > > based on current discussion, whether to make holes in hvmloader > doesn't bring fundamental difference. as long as domain builder > still need to populate memory (even minimal for hvmloader to boot), > it needs to check conflict and may ideally make hole too (though we > may make assumption not doing that) Well it will have an impact on the overall design of the code; but you're right, if RMRRs really can (and will) be anywhere in memory, then the domain builder will need to know what RMRRs are going to be reserved for this VM and avoid populating those. If, on the other hand, we can make some fairly small assumptions about where there will not be any RMRRs, then we can get away with handling everything in hvmloader. >> >> 3. What happens if Xen is asked to assign a device and it finds that >> the required RMRR is not empty: >> a. during guest creation >> b. after the guest has booted > > for Xen we don't need differentiate
Re: [Xen-devel] (v2) Design proposal for RMRR fix
On Mon, Jan 12, 2015 at 1:56 PM, Pasi Kärkkäinen wrote: > On Mon, Jan 12, 2015 at 11:25:56AM +, George Dunlap wrote: >> So qemu-traditional didn't particularly expect to know the guest >> memory layout. qemu-upstream does; it expects to know what areas of >> memory are guest memory and what areas of memory are unmapped. If a >> read or write happens to a gpfn which *xen* knows is valid, but which >> *qemu-upstream* thinks is unmapped, then qemu-upstream will crash. >> >> The problem though is that the guest's memory map is not actually >> communicated to qemu-upstream in any way. Originally, qemu-upstream >> was only told how much memory the guest had, and it just "happens" to >> choose the same guest memory layout as the libxc domain builder does. >> This works, but it is bad design, because if libxc were to change for >> some reason, people would have to simply remember to also change the >> qemu-upstream layout. >> >> Where this really bites us is in PCI pass-through. The default <4G >> MMIO hole is very small; and hvmloader naturally expects to be able to >> make this area larger by relocating memory from below 4G to above 4G. >> It moves the memory in Xen's p2m, but it has no way of communicating >> this to qemu-upstream. So when the guest does an MMIO instuction that >> causes qemu-upstream to access that memory, the guest crashes. >> >> There are two work-arounds at the moment: >> 1. A flag which tells hvmloader not to relocate memory >> 2. The option to tell qemu-upstream to make the memory hole larger. >> >> Both are just work-arounds though; a "proper fix" would be to allow >> hvmloader some way of telling qemu that the memory has moved, so it >> can update its memory map. >> >> This will (I'm pretty sure) have an effect on RMRR regions as well, >> for the reasons I've mentioned above: whether make the "holes" for the >> RMRRs in libxc or in hvmloader, if we *move* that memory up to the top >> of the address space (rather than, say, just not giving that RAM to >> the guest), then qemu-upstream's idea of the guest memory map will be >> wrong, and will probably crash at some point. >> >> Having the ability for hvmloader to populate and/or move the memory >> around, and then tell qemu-upstream what the resulting map looked >> like, would fix both the MMIO-resize issue and the RMRR problem, wrt >> qemu-upstream. >> > > Hmm, wasn't this changed slightly during Xen 4.5 development by Don Slutz? > > You can now specify the mmio_hole size for HVM guests when using > qemu-upstream: > http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List > > > "Bigger PCI MMIO hole in QEMU via the mmio_hole parameter in guest config, > which allows configuring the MMIO size below 4GB. " > > "Backport pc & q35: Add new machine opt max-ram-below-4g": > http://xenbits.xen.org/gitweb/?p=qemu-upstream-unstable.git;a=commit;h=ffdacad07002e14a8072ae28086a57452e48d458 > > "x86: hvm: Allow configuration of the size of the mmio_hole.": > http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=2d927fc41b8e130b3b8910e4442d469d2ac7 Yes -- that's workaround #2 above ("tell qemu-upstream to make the memory hole larger"). But it's still a work-around, because it requires the admin to figure out how big a memory hole he needs. With qemu-traditional, he could just assign whatever devices he wanted, and hvmloader would make it the right size automatically. Ideally that's how it would work for qemu-upstream as well. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed
On 12/01/2015 14:35, Li, Liang Z wrote: > > diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..f2893b2 100644 > --- a/hw/xen/xen_pt.c > +++ b/hw/xen/xen_pt.c > @@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d) > } > > out: > -memory_listener_register(&s->memory_listener, &address_space_memory); > +memory_listener_register(&s->memory_listener, > + &s->dev.bus_master_as); > memory_listener_register(&s->io_listener, &address_space_io); > XEN_PT_LOG(d, > "Real physical device %02x:%02x.%d registered > successfully!\n", > > By further debugging, I found when using 'address_space_memory', > 'xen_pt_region_del' > won't be called when the memory region's name is not ' xen-pci-pt-*', when > using > ' s->dev.bus_master_as ', there is no such issue. > > I think use the device related address space here is more reasonable, but I > am not sure. > Could you give some suggestion? Yes, this patch makes sense. The listener will be called every time the command register is written. Paolo ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [libvirt] [PATCH 09/12] Revert "src/xenconfig: Xen-xl parser"
On 01/10/2015 12:03 AM, Jim Fehlig wrote: > This reverts commit 2c78051a14acfb7aba078d569b1632dfe0ca0853. > > Conflicts: > src/Makefile.am > > Signed-off-by: Jim Fehlig > --- > .gitignore| 1 - > cfg.mk| 3 +- > configure.ac | 1 - > po/POTFILES.in| 1 - > src/Makefile.am | 25 +-- > src/libvirt_xenconfig.syms| 4 - > src/xenconfig/xen_common.c| 3 +- > src/xenconfig/xen_xl.c| 499 > -- > src/xenconfig/xen_xl.h| 33 --- > src/xenconfig/xen_xl_disk.l | 256 -- > src/xenconfig/xen_xl_disk_i.h | 39 > 11 files changed, 4 insertions(+), 861 deletions(-) > OK - so reverting is fine; however, xen_xl_disk.{c,h} still exist... Simple enough solution, but they will show up in someone's git status output since they are also removed from .gitignore. There are a couple Coverity issues from the next 3 patches - I'll note them for those directly. ACK John > diff --git a/.gitignore b/.gitignore > index eac2203..9d09709 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -140,7 +140,6 @@ > /src/remote/*_protocol.[ch] > /src/rpc/virkeepaliveprotocol.[ch] > /src/rpc/virnetprotocol.[ch] > -/src/xenconfig/xen_xl_disk.[ch] > /src/test_libvirt*.aug > /src/test_virtlockd.aug > /src/util/virkeymaps.h > diff --git a/cfg.mk b/cfg.mk > index 3df3dcb..21f83c3 100644 > --- a/cfg.mk > +++ b/cfg.mk > @@ -89,9 +89,8 @@ distdir: sc_vulnerable_makefile_CVE-2012-3386.z > endif > > # Files that should never cause syntax check failures. > -# (^(HACKING|docs/(news\.html\.in|.*\.patch))|\.(po|fig|gif|ico|png))$$ > VC_LIST_ALWAYS_EXCLUDE_REGEX = \ > - > (^(HACKING|docs/(news\.html\.in|.*\.patch)|src/xenconfig/xen_xl_disk.[chl])|\.(po|fig|gif|ico|png))$$ > + (^(HACKING|docs/(news\.html\.in|.*\.patch))|\.(po|fig|gif|ico|png))$$ > > # Functions like free() that are no-ops on NULL arguments. > useless_free_options = \ > diff --git a/configure.ac b/configure.ac > index 167b875..9d12079 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -146,7 +146,6 @@ m4_ifndef([LT_INIT], [ > ]) > AM_PROG_CC_C_O > AM_PROG_LD > -AM_PROG_LEX > > AC_MSG_CHECKING([for how to mark DSO non-deletable at runtime]) > LIBVIRT_NODELETE= > diff --git a/po/POTFILES.in b/po/POTFILES.in > index 094c8e3..e7cb2cc 100644 > --- a/po/POTFILES.in > +++ b/po/POTFILES.in > @@ -247,7 +247,6 @@ src/xenapi/xenapi_driver.c > src/xenapi/xenapi_utils.c > src/xenconfig/xen_common.c > src/xenconfig/xen_sxpr.c > -src/xenconfig/xen_xl.c > src/xenconfig/xen_xm.c > tests/virpolkittest.c > tools/libvirt-guests.sh.in > diff --git a/src/Makefile.am b/src/Makefile.am > index c7975e5..e0e47d0 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -1000,22 +1000,11 @@ CPU_SOURCES = > \ > VMX_SOURCES =\ > vmx/vmx.c vmx/vmx.h > > -AM_LFLAGS = -Pxl_disk_ --header-file=../$*.h > -LEX_OUTPUT_ROOT = lex.xl_disk_ > -BUILT_SOURCES += xenconfig/xen_xl_disk.c xenconfig/xen_xl_disk.h > -# Generated header file is not implicitly added to dist > -EXTRA_DIST += xenconfig/xen_xl_disk.h > -CLEANFILES += xenconfig/xen_xl_disk.h xenconfig/xen_xl_disk.c > - > -XENXLDISKPARSER_SOURCES = xenconfig/xen_xl_disk.l > - > XENCONFIG_SOURCES = \ > xenconfig/xenxs_private.h \ > - xenconfig/xen_common.c xenconfig/xen_common.h \ > + xenconfig/xen_common.c xenconfig/xen_common.h \ > xenconfig/xen_sxpr.c xenconfig/xen_sxpr.h \ > - xenconfig/xen_xm.c xenconfig/xen_xm.h \ > - xenconfig/xen_xl.c xenconfig/xen_xl.h \ > - xenconfig/xen_xl_disk_i.h > + xenconfig/xen_xm.c xenconfig/xen_xm.h > > pkgdata_DATA = cpu/cpu_map.xml > > @@ -1070,19 +1059,10 @@ libvirt_vmx_la_SOURCES = $(VMX_SOURCES) > endif WITH_VMX > > if WITH_XENCONFIG > -# Flex generated XL disk parser needs to be compiled without WARN_FLAGS > -# Add the generated object to its own library to control CFLAGS > -noinst_LTLIBRARIES += libvirt_xenxldiskparser.la > -libvirt_xenxldiskparser_la_CFLAGS = \ > - -I$(srcdir)/conf $(AM_CFLAGS) -Wno-unused-parameter > -libvirt_xenxldiskparser_la_SOURCES = \ > - $(XENXLDISKPARSER_SOURCES) > - > noinst_LTLIBRARIES += libvirt_xenconfig.la > libvirt_la_BUILT_LIBADD += libvirt_xenconfig.la > libvirt_xenconfig_la_CFLAGS = \ > -I$(srcdir)/conf $(AM_CFLAGS) > -libvirt_xenconfig_la_LIBADD = libvirt_xenxldiskparser.la > libvirt_xenconfig_la_SOURCES = $(XENCONFIG_SOURCES) > endif WITH_XENCONFIG > > @@ -1844,7 +1824,6 @@ EXTRA_DIST += > \ > $(VBOX_DRIVER
Re: [Xen-devel] [PATCH v2 1/3] x86: also allow REP STOS emulation acceleration
On Fri, Jan 9, 2015 at 12:10 PM, Jan Beulich wrote: On 09.01.15 at 12:45, wrote: >> At 11:24 + on 09 Jan (1420799087), Jan Beulich wrote: >>> >>> On 09.01.15 at 12:18, wrote: >>> >> > +default: >>> >> > +xfree(buf); >>> >> > +ASSERT(!buf); >>> > >>> > looks dodgy... >>> >>> In which way? The "default" is supposed to be unreachable, and sits >>> in the else branch to an if(!buf), i.e. in a release build we'll correctly >>> free the buffer, while in a debug build the ASSERT() will trigger. >> >> Oh I see. Can you please use ASSERT(0) for that? > > I sincerely dislike ASSERT(0), but if that's the only way to get > the patch accepted... Um, by what set of criteria is ASSERT(!buf) better than ASSERT(0)? At least the second tells the reader that you're not using ASSERT() the normal way. I agree that ASSERT_UNREACHABLE() is probably the best option. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [Xen-users] [TestDay] minor bug + possible configuration bug 4.5rc4 archlinux
Adding some CC's, including the devel list. On Fri, 2015-01-09 at 19:49 -0600, Doug McMillan wrote: > > > configuration(?) > I compiled booted straight from bios with xen.efi during boot I received > several errors. > xl info works (see attachment). XL list locks the terminal session. Checking > into the errors I > found the following under dmesg (full attached): > > > Ignoring BGRT: invalid status 0 (expected 1) > ACPI: SCI (ACPI GSI 9) not registered > kvm: no hardware support > mce: Unable to init device /dev/mcelog (rc: -16) > > > systemd[1]: Set hostname to . > systemd[1]: var-lib-xenstored.mount's Where= setting doesn't match unit > name. Refusing. I suppose this error message is accurate and the Where field is not "/var/lib/xenstored"? What is it actually? What options did you pass to ./configure when building Xen? This seems like a silly misfeature of systemd to me, but I suppose the fix is to rename the thing to match, except that would require all the dependent units to have the field changed too. It might turn out to be easier to instead arrange for @XEN_LIB_STORED@ to == /var/lib/xenstored. @devs -- we obviously need to do something about this (too late for 4.5, but for 4.6 + backport). Perhaps there is some alternative systemd construction which disassociates the actual path from the abstract service "xenstored dir mounted"? Otherwise we'll have to somehow arrange for the file and everything which depends on it to have a name which reflects the actual mount path, which sounds pretty awful to me... Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2] [Bugfix] x86/apic: Fix xen IRQ allocation failure caused by commit b81975eade8c
On 12/01/15 13:39, Jiang Liu wrote: > Commit b81975eade8c ("x86, irq: Clean up irqdomain transition code") > breaks xen IRQ allocation because xen_smp_prepare_cpus() doesn't invoke > setup_IO_APIC(), so no irqdomains created for IOAPICs and > mp_map_pin_to_irq() fails at the very beginning. > > Enhance xen_smp_prepare_cpus() to call setup_IO_APIC() to initialize > irqdomain for IOAPICs. Having Xen call setup_IO_APIC() to initialize the irq domains then having to add special cases to it is just wrong. The bits of init deferred by mp_register_apic() are also deferred to two different places which looks odd. What about something like the following (untested) patch? diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3f5f604..e180680 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -253,8 +253,10 @@ int __init arch_early_ioapic_init(void) if (!nr_legacy_irqs()) io_apic_irqs = ~0UL; - for_each_ioapic(i) + for_each_ioapic(i) { + BUG_ON(mp_irqdomain_create(ioapic)); alloc_ioapic_saved_registers(i); + } /* * For legacy IRQ's, start with assigning irq0 to irq15 to @@ -2379,8 +2381,6 @@ void __init setup_IO_APIC(void) io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - for_each_ioapic(ioapic) - BUG_ON(mp_irqdomain_create(ioapic)); /* * Set up IO-APIC IRQ routing. @@ -2929,7 +2929,8 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base, /* * If mp_register_ioapic() is called during early boot stage when * walking ACPI/SFI/DT tables, it's too early to create irqdomain, -* we are still using bootmem allocator. So delay it to setup_IO_APIC(). +* we are still using bootmem allocator. So delay it to +* arch_early_ioapic_init(). */ if (hotplug) { if (mp_irqdomain_create(idx)) { > --- a/arch/x86/kernel/apic/io_apic.c > +++ b/arch/x86/kernel/apic/io_apic.c > @@ -2369,6 +2381,15 @@ static void ioapic_destroy_irqdomain(int idx) > ioapics[idx].pin_info = NULL; > } > > +static void setup_IO_APIC_IDs(void) > +{ > + if (xen_domain()) > + return; This would have to xen_pv_domain(). David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [seabios test] 33359: regressions - FAIL
flight 33359 seabios real [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/33359/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: test-amd64-amd64-xl-qemuu-debianhvm-amd64 5 xen-boot fail REGR. vs. 33317 Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-pvh-intel 9 guest-start fail never pass test-amd64-i386-libvirt 9 guest-start fail never pass test-amd64-amd64-libvirt 9 guest-start fail never pass test-amd64-amd64-xl-pvh-amd 9 guest-start fail never pass test-amd64-amd64-xl-pcipt-intel 9 guest-start fail never pass test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-winxpsp3 14 guest-stop fail never pass version targeted for testing: seabios 301dd092c2d04a5d70c94b9d873d810785e94a84 baseline version: seabios 60e0e55f212dadd043ab9e39bee05a48013ddd8f People who touched revisions under test: Kevin O'Connor jobs: build-amd64 pass build-i386 pass build-amd64-libvirt pass build-i386-libvirt pass build-amd64-pvopspass build-i386-pvops pass test-amd64-amd64-xl pass test-amd64-i386-xl pass test-amd64-amd64-xl-pvh-amd fail test-amd64-i386-rhel6hvm-amd pass test-amd64-i386-qemut-rhel6hvm-amd pass test-amd64-i386-qemuu-rhel6hvm-amd pass test-amd64-amd64-xl-qemut-debianhvm-amd64pass test-amd64-i386-xl-qemut-debianhvm-amd64 pass test-amd64-amd64-xl-qemuu-debianhvm-amd64fail test-amd64-i386-xl-qemuu-debianhvm-amd64 pass test-amd64-i386-freebsd10-amd64 pass test-amd64-amd64-xl-qemuu-ovmf-amd64 pass test-amd64-i386-xl-qemuu-ovmf-amd64 pass test-amd64-amd64-xl-qemut-win7-amd64 fail test-amd64-i386-xl-qemut-win7-amd64 fail test-amd64-amd64-xl-qemuu-win7-amd64 fail test-amd64-i386-xl-qemuu-win7-amd64 fail test-amd64-amd64-xl-win7-amd64 fail test-amd64-i386-xl-win7-amd64fail test-amd64-i386-xl-credit2 pass test-amd64-i386-freebsd10-i386 pass test-amd64-amd64-xl-pcipt-intel fail test-amd64-amd64-xl-pvh-intelfail test-amd64-i386-rhel6hvm-intel pass test-amd64-i386-qemut-rhel6hvm-intel pass test-amd64-i386-qemuu-rhel6hvm-intel pass test-amd64-amd64-libvirt fail test-amd64-i386-libvirt fail test-amd64-i386-xl-multivcpu pass test-amd64-amd64-pairpass test-amd64-i386-pair pass test-amd64-amd64-xl-sedf-pin pass test-amd64-
Re: [Xen-devel] [PATCH 00/12] Replace Xen xl parsing/formatting impl
On Fri, 2015-01-09 at 22:03 -0700, Jim Fehlig wrote: > The first attempt to implement support for parsing/formatting Xen's > xl disk config format copied Xen's flex-based parser into libvirt, which > has proved to be challenging in the context of autotools. But as it turns > out, Xen provides an interface to the parser via libxlutil. > > This series reverts the first attempt, along with subsequent attempts to > fix it, and replaces it with an implementation based on libxlutil. The > first nine patches revert the original implementation and subsequent fixes. > Patch 10 provides an implemenation based on libxlutil. Patches 11 and > 12 are basically unchanged from patches 3 and 4 in the first attempt. > > One upshot of using libxlutil instead of copying the flex source is > removing the potential for source divergence. Thanks for doing this, looks good to me, FWIW. Is the presence/absence of xen-xl support exposed via virsh anywhere? If so then I can arrange for my Xen osstest patches for libvirt testing to use xen-xl when available but still fallback to xen-xm. I've had a look in "virsh capabilities" and "virsh help domxml-from-native" but not seeing xen-xm, so assuming xen-xl won't magically appear in any of those places either. (TBH, this may become moot since I suspect your patches will be well established by the time my osstest patches hit osstest...) Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c
El 12/01/15 a les 14.04, David Vrabel ha escrit: > On 12/01/15 11:36, Roger Pau Monné wrote: >> El 12/01/15 a les 8.09, Bob Liu ha escrit: >>> >>> On 01/09/2015 11:51 PM, Roger Pau Monné wrote: El 06/01/15 a les 14.19, Bob Liu ha escrit: > When there is no enough free grants, gnttab_alloc_grant_references() > will fail and block request queue will stop. > If the system is always lack of grants, blkif_restart_queue_callback() > can't be > scheduled and block request queue can't be restart(block I/O hang). > > But when there are former requests complete, some grants may free to > persistent_gnts_c, we can give the request queue another chance to > restart and > avoid block hang. > > Reported-by: Junxiao Bi > Signed-off-by: Bob Liu > --- > drivers/block/xen-blkfront.c | 11 +++ > 1 file changed, 11 insertions(+) > > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index 2236c6f..dd30f99 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, > struct blkfront_info *info, > } > } > } > + > + /* > + * Request queue would be stopped if failed to alloc enough grants and > + * won't be restarted until gnttab_free_count >= info->callback->count. > + * > + * But there is another case, once we have enough persistent grants we > + * can try to restart the request queue instead of continue to wait for > + * 'gnttab_free_count'. > + */ > + if (info->persistent_gnts_c >= info->callback.count) > + schedule_work(&info->work); I guess I'm missing something here, but blkif_completion is called by blkif_interrupt, which in turn calls kick_pending_request_queues when finished, which IMHO should be enough to restart the processing of requests. >>> >>> You are right, sorry for the mistake. >>> >>> The problem we met was a xenblock I/O hang. >>> Dumped data showed at that time info->persistent_gnt_c = 8, max_gref = 8 >>> but block request queue was still stopped. >>> It's very hard to reproduce this issue, we only see it once. >>> >>> I think there might be a race condition: >>> >>> request A request B: >>> >>>info->persistent_gnts_c < max_grefs >>>and fail to alloc enough grants >>> >>> >>> >>> interrupt happen, blkif_complte(): >>> info->persistent_gnts_c++ >>> kick_pending_request_queues() blkif_interrupt can never interrupt blkif_queue_request, because it's holding a spinlock (info->io_lock). If you have seen this trace in the wild it means something is really wrong and we are calling blkif_queue_request without acquiring the spinlock and thus without disabling interrupts. >>> >>> stop block request queue >>> added to callback list >>> >>> If the system don't have enough grants(but have enough persistent_gnts), >>> request queue would still hang. >> >> Not sure how can this happen, blkif_queue_request explicitly checks that >> persistent_gnts_c < max_grefs before adding the callback and stopping >> the request queue, so in your case the queue should not be blocked. Can >> you dump the state of info->connected? > > I think Bob has correctly identified a race. > > After calling blk_stop_queue(), check info->persistent_gnts again and > restart the queue if there free grefs. > > David > > ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [xen-4.5-testing test] 33348: tolerable FAIL - PUSHED
flight 33348 xen-4.5-testing real [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/33348/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-pvh-intel 9 guest-start fail never pass test-amd64-amd64-xl-pvh-amd 9 guest-start fail never pass test-armhf-armhf-libvirt 9 guest-start fail never pass test-armhf-armhf-xl 10 migrate-support-checkfail never pass test-amd64-amd64-libvirt 9 guest-start fail never pass test-amd64-amd64-xl-pcipt-intel 9 guest-start fail never pass test-amd64-i386-libvirt 9 guest-start fail never pass test-amd64-i386-xl-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass test-amd64-i386-xl-win7-amd64 14 guest-stop fail never pass test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop fail never pass test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop fail never pass test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail never pass test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop fail never pass test-amd64-amd64-xl-winxpsp3 14 guest-stop fail never pass version targeted for testing: xen 3508d75155dae3154d10cd5c33d44336f6f4bf1c baseline version: xen 36174af3fbeb1b662c0eadbfa193e77f68cc955b People who touched revisions under test: Andrew Cooper Boris Ostrovsky Ed Swierk Ian Campbell Ian Campbell Ian Jackson Ian Jackson Jan Beulich Julien Grall Kevin Tian Konrad Rzeszutek Wilk Mihai DonÈu Olaf Hering Robert Hu RÄzvan Cojocaru Stefano Stabellini Wei Liu Yang Hongyang jobs: build-amd64 pass build-armhf pass build-i386 pass build-amd64-libvirt pass build-armhf-libvirt pass build-i386-libvirt pass build-amd64-pvopspass build-armhf-pvopspass build-i386-pvops pass build-amd64-rumpuserxen pass build-i386-rumpuserxen pass test-amd64-amd64-xl pass test-armhf-armhf-xl pass test-amd64-i386-xl pass test-amd64-amd64-xl-pvh-amd fail test-amd64-i386-rhel6hvm-amd pass test-amd64-i386-qemut-rhel6hvm-amd pass test-amd64-i386-qemuu-rhel6hvm-amd pass test-amd64-amd64-xl-qemut-debianhvm-amd64pass test-amd64-i386-xl-qemut-debianhvm-amd64 pass test-amd64-amd64-xl-qemuu-debianhvm-amd64pass test-amd64-i386-xl-qemuu-debianhvm-amd64 pass test-amd64-i386-freebsd10-amd64 pass test-amd64-amd64-xl-qemuu-ovmf-amd64 pass test-amd64-i386-xl-qemuu-ovmf-amd64 pass test-amd64-amd64-rumpuserxen-amd64 pass test-amd64-amd64-xl-qemut-win7-amd64 fail test-amd64-i386-xl-qemut-win7-amd64 fail test-amd64-amd64-xl-qemuu-win7-amd64 fail test-amd64-i386-xl-qemuu-win7-amd64 fail test-amd64-amd64-xl-win7-amd64 fail test-amd64-i386-xl-win7-amd64fail test-amd64-i386-xl-credit2 pass test-amd64-i386-freebsd10-i386 pass test-amd64-i386-rumpuserxen-i386
[Xen-devel] [PATCH v2] xenstored: log tdb message via xenstored's logging mechanisms
TDB provides us with a callback for this purpose. Use it in both xenstored and xs_tdb_dump. While at it make the existing log() macro tollerate memory failures. Signed-off-by: Ian Campbell --- v2: Use &tdb_logger consistently. Did not: move location of talloc_free, since talloc_free(NULL) returns an error. --- tools/xenstore/xenstored_core.c | 39 +-- tools/xenstore/xs_tdb_dump.c| 12 +++- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c index 4eaff57..3fd9a20 100644 --- a/tools/xenstore/xenstored_core.c +++ b/tools/xenstore/xenstored_core.c @@ -89,9 +89,14 @@ static void check_store(void); #define log(...) \ do {\ char *s = talloc_asprintf(NULL, __VA_ARGS__); \ - trace("%s\n", s); \ - syslog(LOG_ERR, "%s", s); \ - talloc_free(s); \ + if (s) {\ + trace("%s\n", s); \ + syslog(LOG_ERR, "%s", s); \ + talloc_free(s); \ + } else {\ + trace("talloc failure during logging\n"); \ + syslog(LOG_ERR, "talloc failure during logging\n"); \ + } \ } while (0) @@ -1479,13 +1484,35 @@ static void manual_node(const char *name, const char *child) talloc_free(node); } +static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...) +{ + va_list ap; + char *s; + + va_start(ap, fmt); + s = talloc_vasprintf(NULL, fmt, ap); + va_end(ap); + + if (s) { + trace("TDB: %s\n", s); + syslog(LOG_ERR, "TDB: %s", s); + if (verbose) + xprintf("TDB: %s", s); + talloc_free(s); + } else { + trace("talloc failure during logging\n"); + syslog(LOG_ERR, "talloc failure during logging\n"); + } +} + static void setup_structure(void) { char *tdbname; tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb()); if (!(tdb_flags & TDB_INTERNAL)) - tdb_ctx = tdb_open(tdbname, 0, tdb_flags, O_RDWR, 0); + tdb_ctx = tdb_open_ex(tdbname, 0, tdb_flags, O_RDWR, 0, + &tdb_logger, NULL); if (tdb_ctx) { /* XXX When we make xenstored able to restart, this will have @@ -1516,8 +1543,8 @@ static void setup_structure(void) talloc_free(tlocal); } else { - tdb_ctx = tdb_open(tdbname, 7919, tdb_flags, O_RDWR|O_CREAT, - 0640); + tdb_ctx = tdb_open_ex(tdbname, 7919, tdb_flags, O_RDWR|O_CREAT, + 0640, &tdb_logger, NULL); if (!tdb_ctx) barf_perror("Could not create tdb file %s", tdbname); diff --git a/tools/xenstore/xs_tdb_dump.c b/tools/xenstore/xs_tdb_dump.c index b91cdef..9f636f9 100644 --- a/tools/xenstore/xs_tdb_dump.c +++ b/tools/xenstore/xs_tdb_dump.c @@ -33,6 +33,15 @@ static char perm_to_char(enum xs_perm_type perm) '?'; } +static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + int main(int argc, char *argv[]) { TDB_DATA key; @@ -41,7 +50,8 @@ int main(int argc, char *argv[]) if (argc != 2) barf("Usage: xs_tdb_dump "); - tdb = tdb_open(talloc_strdup(NULL, argv[1]), 0, 0, O_RDONLY, 0); + tdb = tdb_open_ex(talloc_strdup(NULL, argv[1]), 0, 0, O_RDONLY, 0, + &tdb_logger, NULL); if (!tdb) barf_perror("Could not open %s", argv[1]); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH] Add a flight to test OVMF master branch
Do the usual stuffs for adding a new branch (ap-* cr-* etc). Modify ts-xen-build so that it builds Xen with the specified ovmf tree and revision. Only build and test on x86 by modifying make-flight and mfi-common. New branch is added to cr-daily-branch. It exports the tree and changeset used for the test if set. Otherwise the values in Config.mk are used. Signed-off-by: Wei Liu --- ap-common|5 + ap-fetch-version |4 ap-fetch-version-old |5 + ap-print-url |3 +++ ap-push |5 + cr-daily-branch | 14 ++ cr-for-branches |2 +- cri-common |1 + make-flight |6 ++ mfi-common |5 - ts-xen-build |7 +++ 11 files changed, 55 insertions(+), 2 deletions(-) diff --git a/ap-common b/ap-common index 96cbd14..e2c3e42 100644 --- a/ap-common +++ b/ap-common @@ -51,6 +51,10 @@ : ${PUSH_TREE_SEABIOS:=$XENBITS:/home/xen/git/osstest/seabios.git} : ${BASE_TREE_SEABIOS:=git://xenbits.xen.org/osstest/seabios.git} +: ${TREE_OVMF_UPSTREAM:=https://github.com/tianocore/edk2.git} +: ${PUSH_TREE_OVMF:=$XENBITS:/home/xen/git/osstest/ovmf.git} +: ${BASE_TREE_OVMF:=git://xenbits.xen.org/osstest/ovmf.git} + : ${TREE_LINUXFIRMWARE:=git://xenbits.xen.org/osstest/linux-firmware.git} : ${PUSH_TREE_LINUXFIRMWARE:=$XENBITS:/home/osstest/ext/linux-firmware.git} : ${UPSTREAM_TREE_LINUXFIRMWARE:=$GIT_KERNEL_ORG/pub/scm/linux/kernel/git/firmware/linux-firmware.git} @@ -77,6 +81,7 @@ fi : ${LOCALREV_LIBVIRT:=daily-cron.$branch} : ${LOCALREV_RUMPUSERXEN:=daily-cron.$branch} : ${LOCALREV_SEABIOS:=daily-cron.$branch} +: ${LOCALREV_OVMF:=daily-cron.$branch} : ${TREEBASE_LINUX_XCP:=http://hg.uk.xensource.com/carbon/trunk/linux-2.6.27} diff --git a/ap-fetch-version b/ap-fetch-version index f6c65d8..33aaf00 100755 --- a/ap-fetch-version +++ b/ap-fetch-version @@ -85,6 +85,10 @@ seabios) repo_tree_rev_fetch_git seabios \ $TREE_SEABIOS_UPSTREAM master $LOCALREV_SEABIOS ;; +ovmf) + repo_tree_rev_fetch_git ovmf \ + $TREE_OVMF_UPSTREAM master $LOCALREV_OVMF + ;; osstest) if [ "x$OSSTEST_USE_HEAD" != "xy" ] ; then git fetch $HOME/testing.git pretest:ap-fetch >&2 diff --git a/ap-fetch-version-old b/ap-fetch-version-old index 43c997c..7b7f50e 100755 --- a/ap-fetch-version-old +++ b/ap-fetch-version-old @@ -30,6 +30,7 @@ select_xenbranch : ${BASE_LOCALREV_LIBVIRT:=daily-cron.$branch.old} : ${BASE_LOCALREV_RUMPUSERXEN:=daily-cron.$branch.old} : ${BASE_LOCALREV_SEABIOS:=daily-cron.$branch.old} +: ${BASE_LOCALREV_OVMF:=daily-cron.$branch.old} : ${BASE_TREE_QEMU_UPSTREAM:=${TREE_QEMU_UPSTREAM/\/staging\//\/}} @@ -92,6 +93,10 @@ seabios) repo_tree_rev_fetch_git seabios \ $BASE_TREE_SEABIOS xen-tested-master $BASE_LOCALREV_SEABIOS ;; +ovmf) + repo_tree_rev_fetch_git ovmf \ + $BASE_TREE_OVMF xen-tested-master $BASE_LOCALREV_OVMF + ;; osstest) if [ "x$OSSTEST_USE_HEAD" != "xy" ] ; then git fetch -f $HOME/testing.git incoming:ap-fetch diff --git a/ap-print-url b/ap-print-url index 7b27e1e..3db2375 100755 --- a/ap-print-url +++ b/ap-print-url @@ -58,6 +58,9 @@ rumpuserxen) seabios) echo $TREE_SEABIOS_UPSTREAM ;; +ovmf) + echo $TREE_OVMF_UPSTREAM + ;; osstest) echo none:; ;; diff --git a/ap-push b/ap-push index a2aa747..01089f3 100755 --- a/ap-push +++ b/ap-push @@ -36,6 +36,7 @@ TREE_XEN=$PUSH_TREE_XEN TREE_LIBVIRT=$PUSH_TREE_LIBVIRT TREE_RUMPUSERXEN=$PUSH_TREE_RUMPUSERXEN TREE_SEABIOS=$PUSH_TREE_SEABIOS +TREE_OVMF=$PUSH_TREE_OVMF if info_linux_tree "$branch"; then cd $repos/linux @@ -92,6 +93,10 @@ seabios) cd $repos/seabios git push $TREE_SEABIOS $revision:refs/heads/xen-tested-master ;; +ovmf) + cd $repos/ovmf + git push $TREE_OVMF $revision:refs/heads/xen-tested-master + ;; osstest) git push $HOME/testing.git $revision:incoming git push $XENBITS:/home/xen/git/osstest.git $revision:master diff --git a/cr-daily-branch b/cr-daily-branch index fc663ce..c7d1071 100755 --- a/cr-daily-branch +++ b/cr-daily-branch @@ -146,6 +146,16 @@ if [ "x$REVISION_SEABIOS" = x ]; then : REVISION_SEABIOS from Config.mk fi fi +if [ "x$REVISION_OVMF" = x ]; then +if [ "x$tree" = "xovmf" ]; then + TREE_OVMF=$TREE_OVMF_UPSTREAM + export TREE_OVMF + determine_version REVISION_OVMF ovmf OVMF + export REVISION_OVMF +else + : REVISION_OVMF from Config.mk +fi +fi if [ "x$REVISION_LIBVIRT" = x ]; then determine_version REVISION_LIBVIRT libvirt LIBVIRT export REVISION_LIBVIRT @@ -203,6 +213,10 @@ seabios) realtree=seabios NEW_REVISION=$REVISION_SEABIOS ;; +ovmf) + realtree=ovmf + NEW_REVISION=$REVISION_OVMF + ;; *)