[Xen-devel] [PATCH v3] x86: also allow REP STOS emulation acceleration

2015-01-12 Thread Jan Beulich
While the REP MOVS acceleration appears to have helped qemu-traditional
based guests, qemu-upstream (or really the respective video BIOSes)
doesn't appear to benefit from that. Instead the acceleration added
here provides a visible performance improvement during very early HVM
guest boot.

Signed-off-by: Jan Beulich 
---
v3: Drop now pointless memory clobber from asm() in hvmemul_rep_stos()
Introduce and use ASSERT_UNREACHABLE(), as suggested by Andrew.
v2: Fix asm() constraints in hvmemul_rep_stos(), as pointed out by
Andrew. Add output operand telling the compiler that "buf" is being
written.

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -731,6 +731,17 @@ static int hvmemul_rep_movs_discard(
 return X86EMUL_OKAY;
 }
 
+static int hvmemul_rep_stos_discard(
+void *p_data,
+enum x86_segment seg,
+unsigned long offset,
+unsigned int bytes_per_rep,
+unsigned long *reps,
+struct x86_emulate_ctxt *ctxt)
+{
+return X86EMUL_OKAY;
+}
+
 static int hvmemul_rep_outs_discard(
 enum x86_segment src_seg,
 unsigned long src_offset,
@@ -982,6 +993,113 @@ static int hvmemul_rep_movs(
 return X86EMUL_OKAY;
 }
 
+static int hvmemul_rep_stos(
+void *p_data,
+enum x86_segment seg,
+unsigned long offset,
+unsigned int bytes_per_rep,
+unsigned long *reps,
+struct x86_emulate_ctxt *ctxt)
+{
+struct hvm_emulate_ctxt *hvmemul_ctxt =
+container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+unsigned long addr;
+paddr_t gpa;
+p2m_type_t p2mt;
+bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
+int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
+   hvm_access_write, hvmemul_ctxt, &addr);
+
+if ( rc == X86EMUL_OKAY )
+{
+uint32_t pfec = PFEC_page_present | PFEC_write_access;
+
+if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+pfec |= PFEC_user_mode;
+
+rc = hvmemul_linear_to_phys(
+addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
+}
+if ( rc != X86EMUL_OKAY )
+return rc;
+
+/* Check for MMIO op */
+(void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
+
+switch ( p2mt )
+{
+unsigned long bytes;
+void *buf;
+
+default:
+/* Allocate temporary buffer. */
+for ( ; ; )
+{
+bytes = *reps * bytes_per_rep;
+buf = xmalloc_bytes(bytes);
+if ( buf || *reps <= 1 )
+break;
+*reps >>= 1;
+}
+
+if ( !buf )
+buf = p_data;
+else
+switch ( bytes_per_rep )
+{
+unsigned long dummy;
+
+#define CASE(bits, suffix) \
+case (bits) / 8:   \
+asm ( "rep stos" #suffix   \
+  : "=m" (*(char (*)[bytes])buf),  \
+"=D" (dummy), "=c" (dummy) \
+  : "a" (*(const uint##bits##_t *)p_data), \
+ "1" (buf), "2" (*reps) ); \
+break
+CASE(8, b);
+CASE(16, w);
+CASE(32, l);
+CASE(64, q);
+#undef CASE
+
+default:
+ASSERT_UNREACHABLE();
+xfree(buf);
+return X86EMUL_UNHANDLEABLE;
+}
+
+/* Adjust address for reverse store. */
+if ( df )
+gpa -= bytes - bytes_per_rep;
+
+rc = hvm_copy_to_guest_phys(gpa, buf, bytes);
+
+if ( buf != p_data )
+xfree(buf);
+
+switch ( rc )
+{
+case HVMCOPY_gfn_paged_out:
+case HVMCOPY_gfn_shared:
+return X86EMUL_RETRY;
+case HVMCOPY_okay:
+return X86EMUL_OKAY;
+}
+
+gdprintk(XENLOG_WARNING,
+ "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu 
bytes_per_rep=%u\n",
+ gpa, *reps, bytes_per_rep);
+/* fall through */
+case p2m_mmio_direct:
+return X86EMUL_UNHANDLEABLE;
+
+case p2m_mmio_dm:
+return hvmemul_do_mmio(gpa, reps, bytes_per_rep, 0, IOREQ_WRITE, df,
+   p_data);
+}
+}
+
 static int hvmemul_read_segment(
 enum x86_segment seg,
 struct segment_register *reg,
@@ -1239,6 +1357,7 @@ static const struct x86_emulate_ops hvm_
 .rep_ins   = hvmemul_rep_ins,
 .rep_outs  = hvmemul_rep_outs,
 .rep_movs  = hvmemul_rep_movs,
+.rep_stos  = hvmemul_rep_stos,
 .read_segment  = hvmemul_read_segment,
 .write_segment = hvmemul_write_segment,
 .read_io   = hvmemul_read_io,
@@ -1264,6 +1383,7 @@ static const struct x86_emulate_ops hvm_
 .rep_ins   = hvmemul_rep_ins_discard,
 .rep_outs  = hvmemul_rep_outs_discard,
 .rep_mo

[Xen-devel] [PATCH v2] x86/HVM: make hvm_efer_valid() honor guest features

2015-01-12 Thread Jan Beulich
Following the earlier similar change validating CR4 modifications.

Signed-off-by: Jan Beulich 
---
v2: consider CR0.PG during restore when checking EFER.LMA

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1672,20 +1672,53 @@ static int hvm_save_cpu_ctxt(struct doma
 return 0;
 }
 
-static bool_t hvm_efer_valid(struct domain *d,
- uint64_t value, uint64_t efer_validbits)
+static bool_t hvm_efer_valid(const struct vcpu *v, uint64_t value,
+ signed int cr0_pg)
 {
-if ( nestedhvm_enabled(d) && cpu_has_svm )
-efer_validbits |= EFER_SVME;
+unsigned int ext1_ecx = 0, ext1_edx = 0;
 
-return !((value & ~efer_validbits) ||
- ((sizeof(long) != 8) && (value & EFER_LME)) ||
- (!cpu_has_svm && (value & EFER_SVME)) ||
- (!cpu_has_nx && (value & EFER_NX)) ||
- (!cpu_has_syscall && (value & EFER_SCE)) ||
- (!cpu_has_lmsl && (value & EFER_LMSLE)) ||
- (!cpu_has_ffxsr && (value & EFER_FFXSE)) ||
- ((value & (EFER_LME|EFER_LMA)) == EFER_LMA));
+if ( cr0_pg < 0 && !is_hardware_domain(v->domain) )
+{
+unsigned int level;
+
+ASSERT(v == current);
+hvm_cpuid(0x8000, &level, NULL, NULL, NULL);
+if ( level >= 0x8001 )
+hvm_cpuid(0x8001, NULL, NULL, &ext1_ecx, &ext1_edx);
+}
+else
+{
+ext1_edx = boot_cpu_data.x86_capability[X86_FEATURE_LM / 32];
+ext1_ecx = boot_cpu_data.x86_capability[X86_FEATURE_SVM / 32];
+}
+
+if ( (value & EFER_SCE) &&
+ !(ext1_edx & cpufeat_mask(X86_FEATURE_SYSCALL)) )
+return 0;
+
+if ( (value & (EFER_LME | EFER_LMA)) &&
+ !(ext1_edx & cpufeat_mask(X86_FEATURE_LM)) )
+return 0;
+
+if ( cr0_pg > 0 && (value & EFER_LMA) && (!(value & EFER_LME) || !cr0_pg) )
+return 0;
+
+if ( (value & EFER_NX) && !(ext1_edx & cpufeat_mask(X86_FEATURE_NX)) )
+return 0;
+
+if ( (value & EFER_SVME) &&
+ (!(ext1_ecx & cpufeat_mask(X86_FEATURE_SVM)) ||
+  !nestedhvm_enabled(v->domain)) )
+return 0;
+
+if ( (value & EFER_LMSLE) && !cpu_has_lmsl )
+return 0;
+
+if ( (value & EFER_FFXSE) &&
+ !(ext1_edx & cpufeat_mask(X86_FEATURE_FFXSR)) )
+return 0;
+
+return 1;
 }
 
 /* These reserved bits in lower 32 remain 0 after any load of CR0 */
@@ -1763,7 +1796,6 @@ static int hvm_load_cpu_ctxt(struct doma
 struct vcpu *v;
 struct hvm_hw_cpu ctxt;
 struct segment_register seg;
-uint64_t efer_validbits;
 
 /* Which vcpu is this? */
 vcpuid = hvm_load_instance(h);
@@ -1794,9 +1826,7 @@ static int hvm_load_cpu_ctxt(struct doma
 return -EINVAL;
 }
 
-efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_LMA
-   | EFER_NX | EFER_SCE;
-if ( !hvm_efer_valid(d, ctxt.msr_efer, efer_validbits) )
+if ( !hvm_efer_valid(v, ctxt.msr_efer, MASK_EXTR(ctxt.cr0, X86_CR0_PG)) )
 {
 printk(XENLOG_G_ERR "HVM%d restore: bad EFER %#" PRIx64 "\n",
d->domain_id, ctxt.msr_efer);
@@ -2936,12 +2966,10 @@ err:
 int hvm_set_efer(uint64_t value)
 {
 struct vcpu *v = current;
-uint64_t efer_validbits;
 
 value &= ~EFER_LMA;
 
-efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_NX | EFER_SCE;
-if ( !hvm_efer_valid(v->domain, value, efer_validbits) )
+if ( !hvm_efer_valid(v, value, -1) )
 {
 gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
  "EFER: %#"PRIx64"\n", value);



x86/HVM: make hvm_efer_valid() honor guest features

Following the earlier similar change validating CR4 modifications.

Signed-off-by: Jan Beulich 
---
v2: consider CR0.PG during restore when checking EFER.LMA

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1672,20 +1672,53 @@ static int hvm_save_cpu_ctxt(struct doma
 return 0;
 }
 
-static bool_t hvm_efer_valid(struct domain *d,
- uint64_t value, uint64_t efer_validbits)
+static bool_t hvm_efer_valid(const struct vcpu *v, uint64_t value,
+ signed int cr0_pg)
 {
-if ( nestedhvm_enabled(d) && cpu_has_svm )
-efer_validbits |= EFER_SVME;
+unsigned int ext1_ecx = 0, ext1_edx = 0;
 
-return !((value & ~efer_validbits) ||
- ((sizeof(long) != 8) && (value & EFER_LME)) ||
- (!cpu_has_svm && (value & EFER_SVME)) ||
- (!cpu_has_nx && (value & EFER_NX)) ||
- (!cpu_has_syscall && (value & EFER_SCE)) ||
- (!cpu_has_lmsl && (value & EFER_LMSLE)) ||
- (!cpu_has_ffxsr && (value & EFER_FFXSE)) ||
- ((value & (EFER_LME|EFER_LMA)) == EFER_LMA));
+if ( cr0_pg < 0 && !is_hardware_domain(v->domain) )
+{
+unsigned int level;
+
+ASSERT(v == current);
+hvm_cpuid(0x8000, &level, NULL, NULL, NULL);
+if (

[Xen-devel] [PATCH] x86: xen: mmu: Remove unused function

2015-01-12 Thread Rickard Strandqvist
Remove the function set_pte_mfn() that is not used anywhere.

This was partially found by using a static code analysis program called 
cppcheck.

Signed-off-by: Rickard Strandqvist 
---
 arch/x86/xen/mmu.c |9 -
 arch/x86/xen/mmu.h |2 --
 2 files changed, 11 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a8a1a3d..6959550 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -281,15 +281,6 @@ static void xen_set_pmd(pmd_t *ptr, pmd_t val)
xen_set_pmd_hyper(ptr, val);
 }
 
-/*
- * Associate a virtual page frame with a given physical page frame
- * and protection flags for that frame.
- */
-void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
-{
-   set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
-}
-
 static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
 {
struct mmu_update u;
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 73809bb..f2dcf79 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -13,8 +13,6 @@ enum pt_level {
 
 bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 
-void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, 
pte_t *ptep);
 void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
  pte_t *ptep, pte_t pte);
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH] common/memory: fix an XSM error path

2015-01-12 Thread Jan Beulich
XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap
return the extent at which failure was detected, not error indicators.

Signed-off-by: Jan Beulich 

--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN
 return start_extent;
 args.domain = d;
 
-rc = xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d);
-if ( rc )
+if ( xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d) )
 {
 rcu_unlock_domain(d);
-return rc;
+return start_extent;
 }
 
 switch ( op )



common/memory: fix an XSM error path

XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap
return the extent at which failure was detected, not error indicators.

Signed-off-by: Jan Beulich 

--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN
 return start_extent;
 args.domain = d;
 
-rc = xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d);
-if ( rc )
+if ( xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d) )
 {
 rcu_unlock_domain(d);
-return rc;
+return start_extent;
 }
 
 switch ( op )
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH] x86emul: tighten CLFLUSH emulation

2015-01-12 Thread Jan Beulich
While for us it's not as bad as it was for Linux, their commit
13e457e0ee ("KVM: x86: Emulator does not decode clflush well", by
Nadav Amit ) nevertheless points out two
shortcomings in our code: opcode 0F AE /7 is clflush only when it uses
a memory mode (otherwise it's SFENCE) and when there's no REP prefix
(an operand size prefix is fine, as that's CLFLUSHOPT).

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4400,7 +4400,9 @@ x86_emulate(
 case 0xae: /* Grp15 */
 switch ( modrm_reg & 7 )
 {
-case 7: /* clflush */
+case 7: /* clflush{,opt} */
+fail_if(modrm_mod == 3);
+fail_if(rep_prefix());
 fail_if(ops->wbinvd == NULL);
 if ( (rc = ops->wbinvd(ctxt)) != 0 )
 goto done;



x86emul: tighten CLFLUSH emulation

While for us it's not as bad as it was for Linux, their commit
13e457e0ee ("KVM: x86: Emulator does not decode clflush well", by
Nadav Amit ) nevertheless points out two
shortcomings in our code: opcode 0F AE /7 is clflush only when it uses
a memory mode (otherwise it's SFENCE) and when there's no REP prefix
(an operand size prefix is fine, as that's CLFLUSHOPT).

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4400,7 +4400,9 @@ x86_emulate(
 case 0xae: /* Grp15 */
 switch ( modrm_reg & 7 )
 {
-case 7: /* clflush */
+case 7: /* clflush{,opt} */
+fail_if(modrm_mod == 3);
+fail_if(rep_prefix());
 fail_if(ops->wbinvd == NULL);
 if ( (rc = ops->wbinvd(ctxt)) != 0 )
 goto done;
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] x86: xen: mmu: Remove unused function

2015-01-12 Thread Jürgen Groß

On 01/11/2015 11:35 PM, Rickard Strandqvist wrote:

Remove the function set_pte_mfn() that is not used anywhere.

This was partially found by using a static code analysis program called 
cppcheck.


Sorry, you seem not to have checked the newest kernel.

Used by:

xen_do_set_identity_and_remap_chunk()
xen_remap_memory()

So: Nak.


Juergen



Signed-off-by: Rickard Strandqvist 
---
  arch/x86/xen/mmu.c |9 -
  arch/x86/xen/mmu.h |2 --
  2 files changed, 11 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a8a1a3d..6959550 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -281,15 +281,6 @@ static void xen_set_pmd(pmd_t *ptr, pmd_t val)
xen_set_pmd_hyper(ptr, val);
  }

-/*
- * Associate a virtual page frame with a given physical page frame
- * and protection flags for that frame.
- */
-void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
-{
-   set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
-}
-
  static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
  {
struct mmu_update u;
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 73809bb..f2dcf79 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -13,8 +13,6 @@ enum pt_level {

  bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);

-void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-
  pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, 
pte_t *ptep);
  void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
  pte_t *ptep, pte_t pte);




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH] x86/MCE: allow overriding the CMCI threshold

2015-01-12 Thread Jan Beulich
We've had reports of systems where CMCIs would surface at a relatively
high rate during certain periods of time, without them apparently
causing subsequent more severe problems (see Xeon E7-8800/4800/2800
specification clarification SC1). Give the admin a knob to lower the
impact on the system logs.

Signed-off-by: Jan Beulich 

--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -242,6 +242,14 @@ the NMI watchdog is also enabled.
 
 If set, override Xen's default choice for the platform timer.
 
+### cmci-threshold
+> `= `
+
+> Default: `2`
+
+Specify the event count threshold for raising Corrected Machine Check
+Interrupts.  Specifying zero disables CMCI handling.
+
 ### cmos-rtc-probe
 > `= `
 
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -492,6 +492,9 @@ static int do_cmci_discover(int i)
 {
 unsigned msr = MSR_IA32_MCx_CTL2(i);
 u64 val;
+unsigned int threshold, max_threshold;
+static unsigned int cmci_threshold = 2;
+integer_param("cmci-threshold", cmci_threshold);
 
 rdmsrl(msr, val);
 /* Some other CPU already owns this bank. */
@@ -500,15 +503,28 @@ static int do_cmci_discover(int i)
 goto out;
 }
 
-val &= ~CMCI_THRESHOLD_MASK;
-wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
-rdmsrl(msr, val);
+if ( cmci_threshold )
+{
+wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK);
+rdmsrl(msr, val);
+}
 
 if (!(val & CMCI_EN)) {
 /* This bank does not support CMCI. Polling timer has to handle it. */
 mcabanks_set(i, __get_cpu_var(no_cmci_banks));
+wrmsrl(msr, val & ~CMCI_THRESHOLD_MASK);
 return 0;
 }
+max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK);
+threshold = cmci_threshold;
+if ( threshold > max_threshold )
+{
+   mce_printk(MCE_QUIET,
+  "CMCI: threshold %#x too large for CPU%u bank %u, using 
%#x\n",
+  threshold, smp_processor_id(), i, max_threshold);
+   threshold = max_threshold;
+}
+wrmsrl(msr, (val & ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold);
 mcabanks_set(i, __get_cpu_var(mce_banks_owned));
 out:
 mcabanks_clear(i, __get_cpu_var(no_cmci_banks));
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h
@@ -86,9 +86,6 @@
 /* Bitfield of MSR_K8_HWCR register */
 #define K8_HWCR_MCi_STATUS_WREN(1ULL << 18)
 
-/*Intel Specific bitfield*/
-#define CMCI_THRESHOLD 0x2
-
 #define MCi_MISC_ADDRMOD_MASK (0x7UL << 6)
 #define MCi_MISC_PHYSMOD(0x2UL << 6)
 



x86/MCE: allow overriding the CMCI threshold

We've had reports of systems where CMCIs would surface at a relatively
high rate during certain periods of time, without them apparently
causing subsequent more severe problems (see Xeon E7-8800/4800/2800
specification clarification SC1). Give the admin a knob to lower the
impact on the system logs.

Signed-off-by: Jan Beulich 

--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -242,6 +242,14 @@ the NMI watchdog is also enabled.
 
 If set, override Xen's default choice for the platform timer.
 
+### cmci-threshold
+> `= `
+
+> Default: `2`
+
+Specify the event count threshold for raising Corrected Machine Check
+Interrupts.  Specifying zero disables CMCI handling.
+
 ### cmos-rtc-probe
 > `= `
 
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -492,6 +492,9 @@ static int do_cmci_discover(int i)
 {
 unsigned msr = MSR_IA32_MCx_CTL2(i);
 u64 val;
+unsigned int threshold, max_threshold;
+static unsigned int cmci_threshold = 2;
+integer_param("cmci-threshold", cmci_threshold);
 
 rdmsrl(msr, val);
 /* Some other CPU already owns this bank. */
@@ -500,15 +503,28 @@ static int do_cmci_discover(int i)
 goto out;
 }
 
-val &= ~CMCI_THRESHOLD_MASK;
-wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
-rdmsrl(msr, val);
+if ( cmci_threshold )
+{
+wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK);
+rdmsrl(msr, val);
+}
 
 if (!(val & CMCI_EN)) {
 /* This bank does not support CMCI. Polling timer has to handle it. */
 mcabanks_set(i, __get_cpu_var(no_cmci_banks));
+wrmsrl(msr, val & ~CMCI_THRESHOLD_MASK);
 return 0;
 }
+max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK);
+threshold = cmci_threshold;
+if ( threshold > max_threshold )
+{
+   mce_printk(MCE_QUIET,
+  "CMCI: threshold %#x too large for CPU%u bank %u, using 
%#x\n",
+  threshold, smp_processor_id(), i, max_threshold);
+   threshold = max_threshold;
+}
+wrmsrl(msr, (val & ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold);
 mcabanks_set(i, __get_cpu_var(mce_banks_owned));
 out:
 mcabanks_clear(i, __get_cpu_var(no_cmci_banks));
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h
+++ b/xen/arch/x

Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Jan Beulich [mailto:jbeul...@suse.com]
> Sent: Friday, January 09, 2015 6:35 PM
> 
> >>> On 09.01.15 at 11:10,  wrote:
> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> Boot time device assignment is different: The question isn't whether
> >> an assigned device works, instead the proper analogy is whether a
> >> device is _present_. If a device doesn't work on bare metal, it will
> >> still be discoverable. Yet if device assignment fails, that's not going
> >> to be the case - for security reasons, the guest would not see any
> >> notion of the device.
> >
> > the question is whether we want such device assignment fail due to
> > RMRR confliction, and the fail decision should be when Xen handles
> > actual assignment instead of when domain builder prepares reserved
> > regions.
> 
> Detecting the failure only in the hypervisor has the downside of
> potentially leaving the user with little clues as to what went wrong.
> Sending messages to the hypervisor log in that case is
> questionable, yet the tool stack (namely libxc) is known to not
> always do a good job in error propagation.
> 
> >> The question isn't about migrating with devices assigned, but about
> >> assigning devices after migration (consider a dual vif + SR-IOV NIC
> >> guest setup where the SR-IOV NIC gets hot-removed before
> >> migration and a new one hot-plugged afterwards).
> >>
> >> Furthermore any tying of the guest memory layout to the host's
> >> where the guest first boots is awkward, as post-migration there's
> >> not going to be any reliable correlation between the guest layout
> >> and the new host's.
> >
> > how can you solve this? like above example, a NIC on node-A leaves
> > a reserved region in guest e820. now it's hot-removed and then
> > migrated to node-b. there's no way to update e820 again since it's
> > only boot structure. then user will still see such awkward regions.
> > since it's not avoidable, report-all in the summary mail looks not
> > causing a new problem.
> 
> The solution to this are reserved regions specified in the guest config,
> independent of host characteristics.
> 

I don't think how reserved regions are specified matter here. My point
is that when a region is reserved in e820 at boot time, there's no way
to erase that knowledge in the guest even when devices causing that
reservation are hot removed later.

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] preparing for 4.4.2 / 4.3.4

2015-01-12 Thread Olaf Hering
On Thu, Jan 08, Jan Beulich wrote:

> now that 4.5 is almost out the door, I'd like to get stable releases
> prepared on the other two active branches. 4.3.4 is expected to be
> the last xen.org managed release on the 4.3 branch. Aiming at RC1s
> some time next week, please indicate commits you consider relevant
> to backport but find missing from the respective branches.

e86539a388314cd3dca88f5e69d7873343197cd8 ("libxc: check return values on mmap() 
and madvise() on xc_alloc_hypercall_buffer()")

Olaf

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [linux-3.14 test] 33341: tolerable FAIL - PUSHED

2015-01-12 Thread xen . org
flight 33341 linux-3.14 real [real]
http://www.chiark.greenend.org.uk/~xensrcts/logs/33341/

Failures :-/ but no regressions.

Regressions which are regarded as allowable (not blocking):
 test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail like 32448

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-armhf-armhf-libvirt  9 guest-start  fail   never pass
 test-amd64-i386-libvirt   9 guest-start  fail   never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-armhf-armhf-xl  10 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pcipt-intel  9 guest-start fail never pass
 test-amd64-amd64-libvirt  9 guest-start  fail   never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass

version targeted for testing:
 linuxc3b70f0bbb6a883f4afa639286043d3f71fbbddf
baseline version:
 linux83a926f7a4e39fb6be0576024e67fe161593defa


People who touched revisions under test:
  "Eric W. Biederman" 
  Andreas Müller 
  Andrew Lunn 
  Andrew Morton 
  Andy Lutomirski 
  Baruch Siach 
  Catalin Marinas 
  Cedric Bosdonnat 
  Chris Mason 
  Christoph Hellwig 
  Dan Carpenter 
  Darrick J. Wong 
  Dmitry Eremin-Solenikov 
  Dmitry Osipenko 
  Eric W. Biederman 
  Filipe Manana 
  Greg Kroah-Hartman 
  H. Peter Anvin 
  Hannes Reinecke 
  Herbert Xu 
  Ingo Molnar 
  Jaehoon Chung 
  James Hogan 
  Jan Kara 
  Jason Cooper 
  Joe Thornber 
  Johannes Berg 
  Josef Bacik 
  Kashyap Desai 
  Lee Jones 
  Linus Torvalds 
  Luis Henriques 
  Michael Halcrow 
  Mike Snitzer 
  Mikulas Patocka 
  Milan Broz 
  Mimi Zohar 
  NeilBrown 
  Oleg Nesterov 
  Paolo Bonzini 
  Paul Moore 
  Peng Tao 
  Peter Guo 
  Rabin Vincent 
  Richard Guy Briggs 
  Richard Weinberger 
  Sumit Saxena 
  sumit.sax...@avagotech.com 
  Takashi Iwai 
  Thierry Reding 
  Thomas Gleixner 
  Trond Myklebust 
  Tyler Hicks 
  Ulf Hansson 
  Uwe Kleine-König 
  Will Deacon 
  Zhang Rui 


jobs:
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 build-amd64-rumpuserxen  pass
 build-i386-rumpuserxen   pass
 test-amd64-amd64-xl  pass
 test-armhf-armhf-xl  pass
 test-amd64-i386-xl   pass
 test-amd64-amd64-xl-pvh-amd  fail
 test-amd64-i386-rhel6hvm-amd pass
 test-amd64-i386-qemut-rhel6hvm-amd   pass
 test-amd64-i386-qemuu-rhel6hvm-amd   pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64pass
 test-amd64-i386-xl-qemut-debianhvm-amd64 pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-i386-xl-qemuu-debianhvm-amd64 pass
 test-amd64-i386-freebsd10-amd64  pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64

[Xen-devel] [PATCH] evtchn: simplify sending of notifications

2015-01-12 Thread Jan Beulich
The trivial wrapper evtchn_set_pending() is pretty pointless, as it
only serves to invoke another wrapper evtchn_port_set_pending(). In
turn, the latter is kind of inconsistent with its siblings in that is
takes a struct vcpu * rather than a struct domain * - adjusting this
allows for more efficient code in the majority of cases.

Signed-off-by: Jan Beulich 

--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -95,8 +95,6 @@ static uint8_t get_xen_consumer(xen_even
 /* Get the notification function for a given Xen-bound event channel. */
 #define xen_notification_fn(e) (xen_consumers[(e)->xen_consumer-1])
 
-static void evtchn_set_pending(struct vcpu *v, int port);
-
 static int virq_is_global(uint32_t virq)
 {
 int rc;
@@ -287,7 +285,7 @@ static long evtchn_bind_interdomain(evtc
  * We may have lost notifications on the remote unbound port. Fix that up
  * here by conservatively always setting a notification on the local port.
  */
-evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport);
+evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn);
 
 bind->local_port = lport;
 
@@ -599,11 +597,10 @@ static long evtchn_close(evtchn_close_t 
 return __evtchn_close(current->domain, close->port);
 }
 
-int evtchn_send(struct domain *d, unsigned int lport)
+int evtchn_send(struct domain *ld, unsigned int lport)
 {
 struct evtchn *lchn, *rchn;
-struct domain *ld = d, *rd;
-struct vcpu   *rvcpu;
+struct domain *rd;
 intrport, ret = 0;
 
 spin_lock(&ld->event_lock);
@@ -633,14 +630,13 @@ int evtchn_send(struct domain *d, unsign
 rd= lchn->u.interdomain.remote_dom;
 rport = lchn->u.interdomain.remote_port;
 rchn  = evtchn_from_port(rd, rport);
-rvcpu = rd->vcpu[rchn->notify_vcpu_id];
 if ( consumer_is_xen(rchn) )
-(*xen_notification_fn(rchn))(rvcpu, rport);
+xen_notification_fn(rchn)(rd->vcpu[rchn->notify_vcpu_id], rport);
 else
-evtchn_set_pending(rvcpu, rport);
+evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn);
 break;
 case ECS_IPI:
-evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport);
+evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn);
 break;
 case ECS_UNBOUND:
 /* silently drop the notification */
@@ -655,11 +651,6 @@ out:
 return ret;
 }
 
-static void evtchn_set_pending(struct vcpu *v, int port)
-{
-evtchn_port_set_pending(v, evtchn_from_port(v->domain, port));
-}
-
 int guest_enabled_event(struct vcpu *v, uint32_t virq)
 {
 return ((v != NULL) && (v->virq_to_evtchn[virq] != 0));
@@ -669,6 +660,7 @@ void send_guest_vcpu_virq(struct vcpu *v
 {
 unsigned long flags;
 int port;
+struct domain *d;
 
 ASSERT(!virq_is_global(virq));
 
@@ -678,7 +670,8 @@ void send_guest_vcpu_virq(struct vcpu *v
 if ( unlikely(port == 0) )
 goto out;
 
-evtchn_set_pending(v, port);
+d = v->domain;
+evtchn_port_set_pending(d, v->vcpu_id, evtchn_from_port(d, port));
 
  out:
 spin_unlock_irqrestore(&v->virq_lock, flags);
@@ -707,7 +700,7 @@ static void send_guest_global_virq(struc
 goto out;
 
 chn = evtchn_from_port(d, port);
-evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port);
+evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
 
  out:
 spin_unlock_irqrestore(&v->virq_lock, flags);
@@ -731,7 +724,7 @@ void send_guest_pirq(struct domain *d, c
 }
 
 chn = evtchn_from_port(d, port);
-evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port);
+evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
 }
 
 static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly;
@@ -1202,7 +1195,6 @@ void notify_via_xen_event_channel(struct
 {
 struct evtchn *lchn, *rchn;
 struct domain *rd;
-intrport;
 
 spin_lock(&ld->event_lock);
 
@@ -1219,9 +1211,8 @@ void notify_via_xen_event_channel(struct
 if ( likely(lchn->state == ECS_INTERDOMAIN) )
 {
 rd= lchn->u.interdomain.remote_dom;
-rport = lchn->u.interdomain.remote_port;
-rchn  = evtchn_from_port(rd, rport);
-evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
+rchn  = evtchn_from_port(rd, lchn->u.interdomain.remote_port);
+evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn);
 }
 
 spin_unlock(&ld->event_lock);
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru
 d->evtchn_port_ops->init(d, evtchn);
 }
 
-static inline void evtchn_port_set_pending(struct vcpu *v,
+static inline void evtchn_port_set_pending(struct domain *d,
+   unsigned int vcpu_id,
struct evtchn *evtchn)
 {
-v->domain->evtchn_port_ops->set_pending(v, evtchn);
+d->evtchn_port_ops->set_pending(d->vcpu[vcpu_

[Xen-devel] [PATCH] arm64/EFI: minor corrections

2015-01-12 Thread Jan Beulich
- don't bail when using the last slot of bootinfo.mem.bank[] (due to
  premature incrementing of the array index)
- GUIDs should be static const (and placed into .init.* whenever
  possible)
- PrintErrMsg() issues a CR/LF pair itself - no need to explicitly
  append one to the message passed to the function

Signed-off-by: Jan Beulich 

--- a/xen/arch/arm/efi/efi-boot.h
+++ b/xen/arch/arm/efi/efi-boot.h
@@ -104,7 +104,7 @@ static int __init fdt_set_reg(void *fdt,
 
 static void __init *lookup_fdt_config_table(EFI_SYSTEM_TABLE *sys_table)
 {
-const EFI_GUID fdt_guid = DEVICE_TREE_GUID;
+static const EFI_GUID __initconst fdt_guid = DEVICE_TREE_GUID;
 EFI_CONFIGURATION_TABLE *tables;
 void *fdt = NULL;
 int i;
@@ -135,15 +135,15 @@ static EFI_STATUS __init efi_process_mem
  || desc_ptr->Type == EfiBootServicesCode
  || desc_ptr->Type == EfiBootServicesData )
 {
-bootinfo.mem.bank[i].start = desc_ptr->PhysicalStart;
-bootinfo.mem.bank[i].size = desc_ptr->NumberOfPages * 
EFI_PAGE_SIZE;
-if ( ++i >= NR_MEM_BANKS )
+if ( i >= NR_MEM_BANKS )
 {
-PrintStr(L"Warning: All ");
-DisplayUint(NR_MEM_BANKS, -1);
-PrintStr(L" bootinfo mem banks exhausted.\r\n");
+PrintStr(L"Warning: All " __stringify(NR_MEM_BANKS)
+  " bootinfo mem banks exhausted.\r\n");
 break;
 }
+bootinfo.mem.bank[i].start = desc_ptr->PhysicalStart;
+bootinfo.mem.bank[i].size = desc_ptr->NumberOfPages * 
EFI_PAGE_SIZE;
+++i;
 }
 desc_ptr = NextMemoryDescriptor(desc_ptr, desc_size);
 }
@@ -334,7 +334,7 @@ static void __init efi_arch_process_memo
 status = fdt_add_uefi_nodes(SystemTable, fdt, map, map_size, desc_size,
 desc_ver);
 if ( EFI_ERROR(status) )
-PrintErrMesg(L"Updating FDT failed\r\n", status);
+PrintErrMesg(L"Updating FDT failed", status);
 }
 
 static void __init efi_arch_pre_exit_boot(void)
@@ -408,7 +408,7 @@ static void __init efi_arch_handle_cmdli
 
 status = efi_bs->AllocatePool(EfiBootServicesData, EFI_PAGE_SIZE, (void 
**)&buf);
 if ( EFI_ERROR(status) )
-PrintErrMesg(L"Unable to allocate string buffer\r\n", status);
+PrintErrMesg(L"Unable to allocate string buffer", status);
 
 if ( image_name )
 {



arm64/EFI: minor corrections

- don't bail when using the last slot of bootinfo.mem.bank[] (due to
  premature incrementing of the array index)
- GUIDs should be static const (and placed into .init.* whenever
  possible)
- PrintErrMsg() issues a CR/LF pair itself - no need to explicitly
  append one to the message passed to the function

Signed-off-by: Jan Beulich 

--- a/xen/arch/arm/efi/efi-boot.h
+++ b/xen/arch/arm/efi/efi-boot.h
@@ -104,7 +104,7 @@ static int __init fdt_set_reg(void *fdt,
 
 static void __init *lookup_fdt_config_table(EFI_SYSTEM_TABLE *sys_table)
 {
-const EFI_GUID fdt_guid = DEVICE_TREE_GUID;
+static const EFI_GUID __initconst fdt_guid = DEVICE_TREE_GUID;
 EFI_CONFIGURATION_TABLE *tables;
 void *fdt = NULL;
 int i;
@@ -135,15 +135,15 @@ static EFI_STATUS __init efi_process_mem
  || desc_ptr->Type == EfiBootServicesCode
  || desc_ptr->Type == EfiBootServicesData )
 {
-bootinfo.mem.bank[i].start = desc_ptr->PhysicalStart;
-bootinfo.mem.bank[i].size = desc_ptr->NumberOfPages * 
EFI_PAGE_SIZE;
-if ( ++i >= NR_MEM_BANKS )
+if ( i >= NR_MEM_BANKS )
 {
-PrintStr(L"Warning: All ");
-DisplayUint(NR_MEM_BANKS, -1);
-PrintStr(L" bootinfo mem banks exhausted.\r\n");
+PrintStr(L"Warning: All " __stringify(NR_MEM_BANKS)
+  " bootinfo mem banks exhausted.\r\n");
 break;
 }
+bootinfo.mem.bank[i].start = desc_ptr->PhysicalStart;
+bootinfo.mem.bank[i].size = desc_ptr->NumberOfPages * 
EFI_PAGE_SIZE;
+++i;
 }
 desc_ptr = NextMemoryDescriptor(desc_ptr, desc_size);
 }
@@ -334,7 +334,7 @@ static void __init efi_arch_process_memo
 status = fdt_add_uefi_nodes(SystemTable, fdt, map, map_size, desc_size,
 desc_ver);
 if ( EFI_ERROR(status) )
-PrintErrMesg(L"Updating FDT failed\r\n", status);
+PrintErrMesg(L"Updating FDT failed", status);
 }
 
 static void __init efi_arch_pre_exit_boot(void)
@@ -408,7 +408,7 @@ static void __init efi_arch_handle_cmdli
 
 status = efi_bs->AllocatePool(EfiBootServicesData, EFI_PAGE_SIZE, (void 
**)&buf);
 if ( EFI_ERROR(status) )
-PrintErrMesg(L"Unable to allocate string buffer\r\n", status);
+PrintErrMesg(L"Unable to allocate string buffer", status);
 
 if ( image_name )
 {

[Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects

2015-01-12 Thread Jan Beulich
In particular the .rodata.str2 case is relevant for the EFI boot code
(moving around 3k from permanent to init-time sections).

Signed-off-by: Jan Beulich 

--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -170,7 +170,10 @@ _clean_%/: FORCE
 %.o: %.S Makefile
$(CC) $(AFLAGS) -c $< -o $@
 
-SPECIAL_DATA_SECTIONS := rodata $(foreach n,1 2 4 8,rodata.str1.$(n)) \
+SPECIAL_DATA_SECTIONS := rodata $(foreach a,1 2 4 8 16, \
+   $(foreach w,1 2 4, \
+   rodata.str$(w).$(a)) \
+   rodata.cst$(a)) \
 $(foreach r,rel rel.ro,data.$(r).local)
 
 $(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): %.init.o: %.o Makefile



extend list of sections convertible to .init.* ones in init-only objects

In particular the .rodata.str2 case is relevant for the EFI boot code
(moving around 3k from permanent to init-time sections).

Signed-off-by: Jan Beulich 

--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -170,7 +170,10 @@ _clean_%/: FORCE
 %.o: %.S Makefile
$(CC) $(AFLAGS) -c $< -o $@
 
-SPECIAL_DATA_SECTIONS := rodata $(foreach n,1 2 4 8,rodata.str1.$(n)) \
+SPECIAL_DATA_SECTIONS := rodata $(foreach a,1 2 4 8 16, \
+   $(foreach w,1 2 4, \
+   rodata.str$(w).$(a)) \
+   rodata.cst$(a)) \
 $(foreach r,rel rel.ro,data.$(r).local)
 
 $(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): %.init.o: %.o Makefile
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] preparing for 4.4.2 / 4.3.4

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 09:47,  wrote:
> On Thu, Jan 08, Jan Beulich wrote:
> 
>> now that 4.5 is almost out the door, I'd like to get stable releases
>> prepared on the other two active branches. 4.3.4 is expected to be
>> the last xen.org managed release on the 4.3 branch. Aiming at RC1s
>> some time next week, please indicate commits you consider relevant
>> to backport but find missing from the respective branches.
> 
> e86539a388314cd3dca88f5e69d7873343197cd8 ("libxc: check return values on 
> mmap() and madvise() on xc_alloc_hypercall_buffer()")

Tool stack backport requests should be Cc-ed to the respective
maintainer (now added).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

2015-01-12 Thread Fabio Fantoni

Il 09/01/2015 18:38, Dario Faggioli ha scritto:

On Fri, 2015-01-09 at 14:42 +, Wei Liu wrote:

On Thu, Jan 08, 2015 at 03:33:17PM +0100, Fabio Fantoni wrote:

Sorry for the probably stupid question, what are the pros and cons of
default use of phy instead qdisk for raw files as domU disk?


There's no stupid question. :-)

I was told that it performs better and enables other potential
improvements.


Not a big deal, probably, but IMO it would be nice to have at least a
few words about when it's happening in the changelog, wouldn't it?


I'm also interested in detailed changelog about.

In the meantime, I saw this:
http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
Based on the post above seems that phy will have important risk of data 
loss if I understand good, from Ian Campbell post:

xl also uses qdisk for raw disk images instead of loop+blkback which
xend used, because there are concerns that the loop driver can lead to
data loss (by not implementing direct i/o the underlying device, and/or
not handling flushes correct, my memory is a bit fuzzy).


Thanks for any reply and sorry for my bad english.




Regards,
Dario




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Jan Beulich [mailto:jbeul...@suse.com]
> Sent: Friday, January 09, 2015 6:46 PM
> 
> >>> On 09.01.15 at 11:26,  wrote:
> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> >>> On 09.01.15 at 07:57,  wrote:
> >> > 1.1) per-device 'warn' vs. global 'warn'
> >> >
> >> > Both Tim/Jan prefer to 'warn' as a per-device option to the admin instead
> >> > of a global option.
> >> >
> >> > In a glimpse a per-device 'warn' option provides more fine-grained
> control
> >> > than a global option, however if thinking it carefully allowing one 
> >> > device
> >> > w/
> >> > potential problem isn't more correct or secure than allowing multiple
> >> > devices w/ potential problem. Even in practice a device like USB can
> >> > work bearing <1MB confliction, like Jan pointed out there's always corner
> >> > cases which we might not know so as long as we open door for one
> device,
> >> > it implies a problematic environment to users and user's judge on
> whether
> >> > he can live up to this problem is not impacted by how many devices the
> door
> >> > is opened for (he anyway needs to study warning message and do
> >> verification
> >> > if choosing to live up)
> >> >
> >> > Regarding to that, imo if we agree to provide 'warn' option, just 
> >> > providing
> >> > a global overriding option (definitely per-vm) is acceptable and simpler.
> >>
> >> If the admin determined that ignoring the RMRR requirements for one
> >> devices is safe, that doesn't (and shouldn't) mean this is the case for
> >> all other devices too.
> >
> > I don't think admin can determine whether it's 100% safe. What admin can
> > decide is whether he lives up to the potential problem based on his purpose
> > or based on some experiments. only device vendor knows when and how
> > RMRR is used. So as long as warn is opened for one device, I think it
> > already means a problem environment and then adding more device is
> > just same situation.
> 
> What if the admin consulted the device and BIOS vendors, and got
> assured there's not going to be any accesses to the reserved regions
> post-boot?

consultancy could be still inaccurate, or man-error may happen. 

> 
> >> > 1.2) when to 'fail'
> >> >
> >> > There is one open whether we should fail immediately in domain builder
> >> > if a confliction is detected.
> >> >
> >> > Jan's comment is yes, we should 'fail' the VM creation as it's an error.
> >> >
> >> > My previous point is more mimicking native behavior, where a device
> >> > failure (in our case it's actually potential device failure since VM is 
> >> > not
> >> > powered yet) doesn't impact user until its function is actually touched.
> >> > In our case, even domain builder fails to re-arrange guest RAM to skip
> >> > reserved regions, we have centralized policy (either 'fail' or 'warn' per
> >> > above conclusion) in Xen hypervisor when the device is actually assigned.
> >> > so a 'warn' should be fine, but my insist on this is not strong.
> >>
> >> See my earlier reply: Failure to add a device to me is more like a
> >> device preventing a bare metal system from coming up altogether.
> >
> > not all devices are required for bare metal to boot. it causes problem
> > only when it's being used in the boot process. say at powering up the
> > disk (insert in the PCI slot) is broken (not sure whether you call such
> > thing as 'failure to add a device'), it is only error when BIOS tries to
> > read disk.
> 
> Not necessarily. Any malfunctioning device touched by the BIOS,
> irrespective of whether the device is needed for booting, can cause
> the boot process to hang. Again, the analogy to bare metal is
> device presence, not whether the device is functioning properly.
> 
> > note device assignment path is the actual path to decide whether a
> > device will be present to the guest. not at this domain build time.
> 
> That would only make a marginal difference in time of when domain
> creation fails.

it's not marginal difference. instead it's about who owns the policy.

to me, detect/avoid conflictions in domain builder is just a preparation
for later device assignment (either deterministic static assignment or
non-deterministic hotplug). As a preparation, we don't need to make
a failure here as a blocker to prevent guest boot. Instead, leave the
decision to where device assignment actually happens then hard
requirement is made on any conflictions a.t.m. Then we just follow
the existing policy of device assignment (either block guest boot, or 
move forward w/o presenting the device), if confliction is treat as
a failure by default (w/o 'warn' override)

> 
> >> > and another point is about hotplug. 'fail' for future devices is too
> > strict,
> >> > but to differentiate that from static-assigned devices, domain builder
> >> > will then need maintain a per-device reserved region structure. just
> >> > 'warn' makes things simple.
> >>
> >> Whereas here I agree - hotplug should just fail (without otherwise
> >> impacting the guest).
> >
> > so 

Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

2015-01-12 Thread Ian Campbell
On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote:
> In the meantime, I saw this:
> http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
> Based on the post above seems that phy will have important risk of data 
> loss if I understand good, from Ian Campbell post:
> > xl also uses qdisk for raw disk images instead of loop+blkback which
> > xend used, because there are concerns that the loop driver can lead to
> > data loss (by not implementing direct i/o the underlying device, and/or
> > not handling flushes correct, my memory is a bit fuzzy).

Stefano already corrected me on this in this very thread.

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 09:46,  wrote:
>>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> Sent: Friday, January 09, 2015 6:35 PM
>> >>> On 09.01.15 at 11:10,  wrote:
>> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> >> The question isn't about migrating with devices assigned, but about
>> >> assigning devices after migration (consider a dual vif + SR-IOV NIC
>> >> guest setup where the SR-IOV NIC gets hot-removed before
>> >> migration and a new one hot-plugged afterwards).
>> >>
>> >> Furthermore any tying of the guest memory layout to the host's
>> >> where the guest first boots is awkward, as post-migration there's
>> >> not going to be any reliable correlation between the guest layout
>> >> and the new host's.
>> >
>> > how can you solve this? like above example, a NIC on node-A leaves
>> > a reserved region in guest e820. now it's hot-removed and then
>> > migrated to node-b. there's no way to update e820 again since it's
>> > only boot structure. then user will still see such awkward regions.
>> > since it's not avoidable, report-all in the summary mail looks not
>> > causing a new problem.
>> 
>> The solution to this are reserved regions specified in the guest config,
>> independent of host characteristics.
> 
> I don't think how reserved regions are specified matter here. My point
> is that when a region is reserved in e820 at boot time, there's no way
> to erase that knowledge in the guest even when devices causing that
> reservation are hot removed later.

I don't think anyone ever indicated that such erasure would be
needed/wanted - I'm not sure how you ended up there.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

2015-01-12 Thread Fabio Fantoni

Il 12/01/2015 10:31, Ian Campbell ha scritto:

On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote:

In the meantime, I saw this:
http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
Based on the post above seems that phy will have important risk of data
loss if I understand good, from Ian Campbell post:

xl also uses qdisk for raw disk images instead of loop+blkback which
xend used, because there are concerns that the loop driver can lead to
data loss (by not implementing direct i/o the underlying device, and/or
not handling flushes correct, my memory is a bit fuzzy).

Stefano already corrected me on this in this very thread.

Ian.



Thanks for your reply.
I saw other posts about and if I understand good when O_DIRECT patches 
will be in upstream loop driver the data loss risk will be solved, right?


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Jan Beulich [mailto:jbeul...@suse.com]
> Sent: Monday, January 12, 2015 5:33 PM
> 
> >>> On 12.01.15 at 09:46,  wrote:
> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> Sent: Friday, January 09, 2015 6:35 PM
> >> >>> On 09.01.15 at 11:10,  wrote:
> >> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> >> The question isn't about migrating with devices assigned, but about
> >> >> assigning devices after migration (consider a dual vif + SR-IOV NIC
> >> >> guest setup where the SR-IOV NIC gets hot-removed before
> >> >> migration and a new one hot-plugged afterwards).
> >> >>
> >> >> Furthermore any tying of the guest memory layout to the host's
> >> >> where the guest first boots is awkward, as post-migration there's
> >> >> not going to be any reliable correlation between the guest layout
> >> >> and the new host's.
> >> >
> >> > how can you solve this? like above example, a NIC on node-A leaves
> >> > a reserved region in guest e820. now it's hot-removed and then
> >> > migrated to node-b. there's no way to update e820 again since it's
> >> > only boot structure. then user will still see such awkward regions.
> >> > since it's not avoidable, report-all in the summary mail looks not
> >> > causing a new problem.
> >>
> >> The solution to this are reserved regions specified in the guest config,
> >> independent of host characteristics.
> >
> > I don't think how reserved regions are specified matter here. My point
> > is that when a region is reserved in e820 at boot time, there's no way
> > to erase that knowledge in the guest even when devices causing that
> > reservation are hot removed later.
> 
> I don't think anyone ever indicated that such erasure would be
> needed/wanted - I'm not sure how you ended up there.
> 

I ended here to indicate that report-all which gives user more reserved
regions than necessary is not a weird case since above scenario can also
create such fact. User shouldn't set expectation about reserved region
layout. and this argument is necessary to support our proposal of using
report-all. :-)

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Konrad Rzeszutek Wilk [mailto:konrad.w...@oracle.com]
> Sent: Saturday, January 10, 2015 4:28 AM
> 
> On Thu, Jan 08, 2015 at 06:02:04PM +, George Dunlap wrote:
> > On Thu, Jan 8, 2015 at 4:10 PM, Jan Beulich  wrote:
> >  On 08.01.15 at 16:59,  wrote:
> > >> On Thu, Jan 8, 2015 at 1:54 PM, Jan Beulich  wrote:
> >  the 1st invocation of this interface will save all reported reserved
> >  regions under domain structure, and later invocation (e.g. from
> >  hvmloader) gets saved content.
> > >>>
> > >>> Why would the reserved regions need attaching to the domain
> > >>> structure? The combination of (to be) assigned devices and
> > >>> global RMRR list always allow reproducing the intended set of
> > >>> regions without any extra storage.
> > >>
> > >> So when you say "(to be) assigned devices", you mean any device which
> > >> is currently assigned, *or may be assigned at some point in the
> > >> future*?
> > >
> > > Yes.
> > >
> > >> Do you think the extra storage for "this VM might possibly be assigned
> > >> this device at some point" wouldn't really be that much bigger than
> > >> "this VM might possibly map this RMRR at some point in the future"?
> > >
> > > Since listing devices without RMRR association would be pointless,
> > > I think a list of devices would require less storage. But see below.
> > >
> > >> It seems a lot cleaner to me to have the toolstack tell Xen what
> > >> ranges are reserved for RMRR per VM, and then have Xen check again
> > >> when assigning a device to make sure that the RMRRs have already been
> > >> reserved.
> > >
> > > With an extra level of what can be got wrong by the admin.
> > > However, I now realize that doing it this way would allow
> > > specifying regions not associated with any device on the host
> > > the guest boots on, but associated with one on a host the guest
> > > may later migrate to.
> >
> > I did say the toolstack, not the admin. :-)
> >
> > At the xl level, I envisioned a single boolean that would say, "Make
> > my memory layout resemble the host system" -- so the MMIO hole would
> > be the same size, and all the RMRRs would be reserved.
> 
> Like the e820_host=1 ? :-)
> 

so this is the extension to report-all, not just for reserved regions but for
all e820 entries. :-)

one thing I'm struggling here (w/ Jan in other threads) is whether reporting
all reserved regions on the host can be a default setting to simplify the 
overall rmrr implementations, given that fact that end user shouldn't set
expectation on actual reserved regions.

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu

2015-01-12 Thread Andrew Cooper
The resource behind an event channel is domain centric rather than vcpu
centric, and free_xen_event_channel() only follows the vcpu's domain pointer.

This change allows mem_event_disable() to avoid arbitrarily referencing
d->vcpu[0] just to pass the domain.

No functional change.

Signed-off-by: Andrew Cooper 
CC: Keir Fraser 
CC: Jan Beulich 
---
 xen/arch/x86/hvm/hvm.c |   12 ++--
 xen/common/event_channel.c |4 +---
 xen/common/mem_event.c |2 +-
 xen/include/xen/event.h|3 +--
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 8b06bfd..acfc032 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -647,7 +647,7 @@ static int hvm_ioreq_server_add_vcpu(struct 
hvm_ioreq_server *s,
 return 0;
 
  fail3:
-free_xen_event_channel(v, sv->ioreq_evtchn);
+free_xen_event_channel(v->domain, sv->ioreq_evtchn);
 
  fail2:
 spin_unlock(&s->lock);
@@ -674,9 +674,9 @@ static void hvm_ioreq_server_remove_vcpu(struct 
hvm_ioreq_server *s,
 list_del(&sv->list_entry);
 
 if ( v->vcpu_id == 0 && s->bufioreq.va != NULL )
-free_xen_event_channel(v, s->bufioreq_evtchn);
+free_xen_event_channel(v->domain, s->bufioreq_evtchn);
 
-free_xen_event_channel(v, sv->ioreq_evtchn);
+free_xen_event_channel(v->domain, sv->ioreq_evtchn);
 
 xfree(sv);
 break;
@@ -701,9 +701,9 @@ static void hvm_ioreq_server_remove_all_vcpus(struct 
hvm_ioreq_server *s)
 list_del(&sv->list_entry);
 
 if ( v->vcpu_id == 0 && s->bufioreq.va != NULL )
-free_xen_event_channel(v, s->bufioreq_evtchn);
+free_xen_event_channel(v->domain, s->bufioreq_evtchn);
 
-free_xen_event_channel(v, sv->ioreq_evtchn);
+free_xen_event_channel(v->domain, sv->ioreq_evtchn);
 
 xfree(sv);
 }
@@ -1333,7 +1333,7 @@ static int hvm_replace_event_channel(struct vcpu *v, 
domid_t remote_domid,
 
 /* xchg() ensures that only we call free_xen_event_channel(). */
 old_port = xchg(p_port, new_port);
-free_xen_event_channel(v, old_port);
+free_xen_event_channel(v->domain, old_port);
 return 0;
 }
 
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 7d6de54..cfe4978 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -1173,11 +1173,9 @@ int alloc_unbound_xen_event_channel(
 }
 
 
-void free_xen_event_channel(
-struct vcpu *local_vcpu, int port)
+void free_xen_event_channel(struct domain *d, int port)
 {
 struct evtchn *chn;
-struct domain *d = local_vcpu->domain;
 
 spin_lock(&d->event_lock);
 
diff --git a/xen/common/mem_event.c b/xen/common/mem_event.c
index 16ebdb5..0cc43d7 100644
--- a/xen/common/mem_event.c
+++ b/xen/common/mem_event.c
@@ -221,7 +221,7 @@ static int mem_event_disable(struct domain *d, struct 
mem_event_domain *med)
 }
 
 /* Free domU's event channel and leave the other one unbound */
-free_xen_event_channel(d->vcpu[0], med->xen_port);
+free_xen_event_channel(d, med->xen_port);
 
 /* Unblock all vCPUs */
 for_each_vcpu ( d, v )
diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
index 88526f8..0eb1dd3 100644
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -60,8 +60,7 @@ typedef void (*xen_event_channel_notification_t)(
 int alloc_unbound_xen_event_channel(
 struct vcpu *local_vcpu, domid_t remote_domid,
 xen_event_channel_notification_t notification_fn);
-void free_xen_event_channel(
-struct vcpu *local_vcpu, int port);
+void free_xen_event_channel(struct domain *d, int port);
 
 /* Query if event channel is in use by the guest */
 int guest_enabled_event(struct vcpu *v, uint32_t virq);
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] xen-ringwatch issues

2015-01-12 Thread Ian Campbell
On Fri, 2015-01-09 at 17:00 -0500, moftah moftah wrote:
> Hi Ian,
> 
> 
> OK, I have followed your suggestions and created a new patch

Thanks, please submit it in the form describe in the wiki page
http://wiki.xen.org/wiki/Submitting_Xen_Patches which I referenced
before.

In particular, a Signed-off-by is a strict requirement for licensing
reasons, to indicate that you have read the DCO (which is at
http://wiki.xen.org/wiki/Submitting_Xen_Patches#Signing_off_a_patch ).

Please also provide a suitable changelog entry and use a unified diff
(diff -u).

I think rather than "[-]*" the right addition would be -? (i.e. a single
optional negative sign), unless Python's re syntax is lots different to
what I expect.

> this new patch does fix the issue fully for the negative numbers
>
> I didnt run the patch on 64bit OS since xenserver has 32bit Dom0

These fields are unsigned int, .i.e. always 32-bits. So I think your "if
self.conf < 0: self.cons = 4294967296 + self.cons" etc will work. The
Internet also suggests "ctypes.c_uint32(i).value", dunno if that is
better. In any case writing 4294967296 as 2**32 would be slightly
clearer.

Ian.




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 10:41,  wrote:
>>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> Sent: Monday, January 12, 2015 5:33 PM
>> >>> On 12.01.15 at 09:46,  wrote:
>> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> >> Sent: Friday, January 09, 2015 6:35 PM
>> >> >>> On 09.01.15 at 11:10,  wrote:
>> >> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> >> >> The question isn't about migrating with devices assigned, but about
>> >> >> assigning devices after migration (consider a dual vif + SR-IOV NIC
>> >> >> guest setup where the SR-IOV NIC gets hot-removed before
>> >> >> migration and a new one hot-plugged afterwards).
>> >> >>
>> >> >> Furthermore any tying of the guest memory layout to the host's
>> >> >> where the guest first boots is awkward, as post-migration there's
>> >> >> not going to be any reliable correlation between the guest layout
>> >> >> and the new host's.
>> >> >
>> >> > how can you solve this? like above example, a NIC on node-A leaves
>> >> > a reserved region in guest e820. now it's hot-removed and then
>> >> > migrated to node-b. there's no way to update e820 again since it's
>> >> > only boot structure. then user will still see such awkward regions.
>> >> > since it's not avoidable, report-all in the summary mail looks not
>> >> > causing a new problem.
>> >>
>> >> The solution to this are reserved regions specified in the guest config,
>> >> independent of host characteristics.
>> >
>> > I don't think how reserved regions are specified matter here. My point
>> > is that when a region is reserved in e820 at boot time, there's no way
>> > to erase that knowledge in the guest even when devices causing that
>> > reservation are hot removed later.
>> 
>> I don't think anyone ever indicated that such erasure would be
>> needed/wanted - I'm not sure how you ended up there.
>> 
> 
> I ended here to indicate that report-all which gives user more reserved
> regions than necessary is not a weird case since above scenario can also
> create such fact. User shouldn't set expectation about reserved region
> layout. and this argument is necessary to support our proposal of using
> report-all. :-)

The fact that ranges can't be removed from a guest's memory map
is irrelevant - there's simply no question that this is that way. The
main counter argument against report-all remains: It may result in
unnecessarily little low memory in guests not needing all of the host
regions to be reserved for them.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Jan Beulich [mailto:jbeul...@suse.com]
> Sent: Monday, January 12, 2015 5:51 PM
> 
> >>> On 12.01.15 at 10:41,  wrote:
> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> Sent: Monday, January 12, 2015 5:33 PM
> >> >>> On 12.01.15 at 09:46,  wrote:
> >> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> >> Sent: Friday, January 09, 2015 6:35 PM
> >> >> >>> On 09.01.15 at 11:10,  wrote:
> >> >> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> >> >> The question isn't about migrating with devices assigned, but about
> >> >> >> assigning devices after migration (consider a dual vif + SR-IOV NIC
> >> >> >> guest setup where the SR-IOV NIC gets hot-removed before
> >> >> >> migration and a new one hot-plugged afterwards).
> >> >> >>
> >> >> >> Furthermore any tying of the guest memory layout to the host's
> >> >> >> where the guest first boots is awkward, as post-migration there's
> >> >> >> not going to be any reliable correlation between the guest layout
> >> >> >> and the new host's.
> >> >> >
> >> >> > how can you solve this? like above example, a NIC on node-A leaves
> >> >> > a reserved region in guest e820. now it's hot-removed and then
> >> >> > migrated to node-b. there's no way to update e820 again since it's
> >> >> > only boot structure. then user will still see such awkward regions.
> >> >> > since it's not avoidable, report-all in the summary mail looks not
> >> >> > causing a new problem.
> >> >>
> >> >> The solution to this are reserved regions specified in the guest config,
> >> >> independent of host characteristics.
> >> >
> >> > I don't think how reserved regions are specified matter here. My point
> >> > is that when a region is reserved in e820 at boot time, there's no way
> >> > to erase that knowledge in the guest even when devices causing that
> >> > reservation are hot removed later.
> >>
> >> I don't think anyone ever indicated that such erasure would be
> >> needed/wanted - I'm not sure how you ended up there.
> >>
> >
> > I ended here to indicate that report-all which gives user more reserved
> > regions than necessary is not a weird case since above scenario can also
> > create such fact. User shouldn't set expectation about reserved region
> > layout. and this argument is necessary to support our proposal of using
> > report-all. :-)
> 
> The fact that ranges can't be removed from a guest's memory map
> is irrelevant - there's simply no question that this is that way. The
> main counter argument against report-all remains: It may result in
> unnecessarily little low memory in guests not needing all of the host
> regions to be reserved for them.
> 

the result is related to another open whether we want to block guest
boot for such problem. If 'warn' in domain builder is acceptable, we
don't need to change lowmem for such rare 1GB case, just throws
a warning for unnecessary conflictions (doesn't hurt if user doesn't
assign it). 

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 00/11] Alternate p2m: support multiple copies of host p2m

2015-01-12 Thread Jan Beulich
>>> On 10.01.15 at 00:04,  wrote:
> On 01/09/2015 02:41 PM, Andrew Cooper wrote:
>> Having some non-OS part of the guest swap the EPT tables and
>> accidentally turn a DMA buffer read-only is not going to end well.
>> 
> 
> The agent can certainly do bad things, and at some level you have to assume it
> is sensible enough not to. However, I'm not sure this is fundamentally more
> dangerous than what a privileged domain can do today using the MEMOP...
> operations, and people are already using those for very similar purposes.

I don't follow - how is what privileged domain can do related to the
proposed changes here (which are - via VMFUNC - at least partially
guest controllable, and that's also the case Andrew mentioned in his
reply)? I'm having a hard time understanding how a P2M stripped of
anything that's not plain RAM can be very useful to a guest. IOW
without such fundamental aspects clarified I don't see a point in
looking at the individual patches (which btw, according to your
wording elsewhere, should have been marked RFC).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu

2015-01-12 Thread Paul Durrant
> -Original Message-
> From: xen-devel-boun...@lists.xen.org [mailto:xen-devel-
> boun...@lists.xen.org] On Behalf Of Andrew Cooper
> Sent: 12 January 2015 09:47
> To: Xen-devel
> Cc: Andrew Cooper; Keir (Xen.org); Jan Beulich
> Subject: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel()
> to take a domain rather than vcpu
> 
> The resource behind an event channel is domain centric rather than vcpu
> centric, and free_xen_event_channel() only follows the vcpu's domain
> pointer.
> 
> This change allows mem_event_disable() to avoid arbitrarily referencing
> d->vcpu[0] just to pass the domain.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper 
> CC: Keir Fraser 
> CC: Jan Beulich 

Reviewed-by: Paul Durrant 

> ---
>  xen/arch/x86/hvm/hvm.c |   12 ++--
>  xen/common/event_channel.c |4 +---
>  xen/common/mem_event.c |2 +-
>  xen/include/xen/event.h|3 +--
>  4 files changed, 9 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 8b06bfd..acfc032 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -647,7 +647,7 @@ static int hvm_ioreq_server_add_vcpu(struct
> hvm_ioreq_server *s,
>  return 0;
> 
>   fail3:
> -free_xen_event_channel(v, sv->ioreq_evtchn);
> +free_xen_event_channel(v->domain, sv->ioreq_evtchn);
> 
>   fail2:
>  spin_unlock(&s->lock);
> @@ -674,9 +674,9 @@ static void hvm_ioreq_server_remove_vcpu(struct
> hvm_ioreq_server *s,
>  list_del(&sv->list_entry);
> 
>  if ( v->vcpu_id == 0 && s->bufioreq.va != NULL )
> -free_xen_event_channel(v, s->bufioreq_evtchn);
> +free_xen_event_channel(v->domain, s->bufioreq_evtchn);
> 
> -free_xen_event_channel(v, sv->ioreq_evtchn);
> +free_xen_event_channel(v->domain, sv->ioreq_evtchn);
> 
>  xfree(sv);
>  break;
> @@ -701,9 +701,9 @@ static void
> hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server *s)
>  list_del(&sv->list_entry);
> 
>  if ( v->vcpu_id == 0 && s->bufioreq.va != NULL )
> -free_xen_event_channel(v, s->bufioreq_evtchn);
> +free_xen_event_channel(v->domain, s->bufioreq_evtchn);
> 
> -free_xen_event_channel(v, sv->ioreq_evtchn);
> +free_xen_event_channel(v->domain, sv->ioreq_evtchn);
> 
>  xfree(sv);
>  }
> @@ -1333,7 +1333,7 @@ static int hvm_replace_event_channel(struct vcpu
> *v, domid_t remote_domid,
> 
>  /* xchg() ensures that only we call free_xen_event_channel(). */
>  old_port = xchg(p_port, new_port);
> -free_xen_event_channel(v, old_port);
> +free_xen_event_channel(v->domain, old_port);
>  return 0;
>  }
> 
> diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
> index 7d6de54..cfe4978 100644
> --- a/xen/common/event_channel.c
> +++ b/xen/common/event_channel.c
> @@ -1173,11 +1173,9 @@ int alloc_unbound_xen_event_channel(
>  }
> 
> 
> -void free_xen_event_channel(
> -struct vcpu *local_vcpu, int port)
> +void free_xen_event_channel(struct domain *d, int port)
>  {
>  struct evtchn *chn;
> -struct domain *d = local_vcpu->domain;
> 
>  spin_lock(&d->event_lock);
> 
> diff --git a/xen/common/mem_event.c b/xen/common/mem_event.c
> index 16ebdb5..0cc43d7 100644
> --- a/xen/common/mem_event.c
> +++ b/xen/common/mem_event.c
> @@ -221,7 +221,7 @@ static int mem_event_disable(struct domain *d,
> struct mem_event_domain *med)
>  }
> 
>  /* Free domU's event channel and leave the other one unbound */
> -free_xen_event_channel(d->vcpu[0], med->xen_port);
> +free_xen_event_channel(d, med->xen_port);
> 
>  /* Unblock all vCPUs */
>  for_each_vcpu ( d, v )
> diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
> index 88526f8..0eb1dd3 100644
> --- a/xen/include/xen/event.h
> +++ b/xen/include/xen/event.h
> @@ -60,8 +60,7 @@ typedef void (*xen_event_channel_notification_t)(
>  int alloc_unbound_xen_event_channel(
>  struct vcpu *local_vcpu, domid_t remote_domid,
>  xen_event_channel_notification_t notification_fn);
> -void free_xen_event_channel(
> -struct vcpu *local_vcpu, int port);
> +void free_xen_event_channel(struct domain *d, int port);
> 
>  /* Query if event channel is in use by the guest */
>  int guest_enabled_event(struct vcpu *v, uint32_t virq);
> --
> 1.7.10.4
> 
> 
> ___
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

2015-01-12 Thread Ian Campbell
On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote:
> Il 12/01/2015 10:31, Ian Campbell ha scritto:
> > On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote:
> >> In the meantime, I saw this:
> >> http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
> >> Based on the post above seems that phy will have important risk of data
> >> loss if I understand good, from Ian Campbell post:
> >>> xl also uses qdisk for raw disk images instead of loop+blkback which
> >>> xend used, because there are concerns that the loop driver can lead to
> >>> data loss (by not implementing direct i/o the underlying device, and/or
> >>> not handling flushes correct, my memory is a bit fuzzy).
> > Stefano already corrected me on this in this very thread.
> >
> > Ian.
> >
> 
> Thanks for your reply.
> I saw other posts about and if I understand good when O_DIRECT patches 
> will be in upstream loop driver the data loss risk will be solved, right?

Stefano says that O_DIRECT is not needed, only correct barrier semantics
are and he believes those are correctly implemented.

O_DIRECT would be an additional layer of safety perhaps, but sounds to
be not strictly needed.

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 10:46,  wrote:
> The resource behind an event channel is domain centric rather than vcpu
> centric, and free_xen_event_channel() only follows the vcpu's domain 
> pointer.

I wonder whether for symmetry alloc_unbound_xen_event_channel()
shouldn't then also take a [struct domain *, unsigned int vcpu_id]
pair instead of a struct vcpu *.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 10:56,  wrote:
> the result is related to another open whether we want to block guest
> boot for such problem. If 'warn' in domain builder is acceptable, we
> don't need to change lowmem for such rare 1GB case, just throws
> a warning for unnecessary conflictions (doesn't hurt if user doesn't
> assign it). 

And how would you then deal with the one guest needing that
range reserved?

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen/evtchn: Alter free_xen_event_channel() to take a domain rather than vcpu

2015-01-12 Thread Andrew Cooper
On 12/01/15 10:07, Jan Beulich wrote:
 On 12.01.15 at 10:46,  wrote:
>> The resource behind an event channel is domain centric rather than vcpu
>> centric, and free_xen_event_channel() only follows the vcpu's domain 
>> pointer.
> I wonder whether for symmetry alloc_unbound_xen_event_channel()
> shouldn't then also take a [struct domain *, unsigned int vcpu_id]
> pair instead of a struct vcpu *.

Sounds like a good idea - I will do that for v2.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH] RFC XSM/evtchn: Never pretend to have successfully created a Xen event channel

2015-01-12 Thread Andrew Cooper
This is RFC because explicitly changes the logic introduced by c/s b34f2c375
"xsm: label xen-consumer event channels", and is only compile tested.

Xen event channels are not internal resources.  They still have one end in a
domain, and are created at the request of privileged domains.  This logic
which "successfully" creates a Xen event channel opens up undesirable failure
cases with ill-specified XSM policies.

If a domain is permitted to create ioreq servers or memevent listeners, but
not to create event channels, the ioreq/memevent creation will succeed but
attempting to bind the returned event channel will fail without any indication
of a permission error.

Signed-off-by: Andrew Cooper 
CC: Daniel De Graaf 
CC: Keir Fraser 
CC: Jan Beulich 
CC: Tim Deegan 
CC: Ian Campbell 
CC: Ian Jackson 
---
 xen/common/event_channel.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index cfe4978..89a7d99 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -1160,11 +1160,13 @@ int alloc_unbound_xen_event_channel(
 chn = evtchn_from_port(d, port);
 
 rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid);
+if ( rc )
+goto out;
 
 chn->state = ECS_UNBOUND;
 chn->xen_consumer = get_xen_consumer(notification_fn);
 chn->notify_vcpu_id = local_vcpu->vcpu_id;
-chn->u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID;
+chn->u.unbound.remote_domid = remote_domid;
 
  out:
 spin_unlock(&d->event_lock);
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Jan Beulich [mailto:jbeul...@suse.com]
> Sent: Monday, January 12, 2015 6:09 PM
> 
> >>> On 12.01.15 at 10:56,  wrote:
> > the result is related to another open whether we want to block guest
> > boot for such problem. If 'warn' in domain builder is acceptable, we
> > don't need to change lowmem for such rare 1GB case, just throws
> > a warning for unnecessary conflictions (doesn't hurt if user doesn't
> > assign it).
> 
> And how would you then deal with the one guest needing that
> range reserved?
> 

if guest needs the range, then report-all or report-sel doesn't matter.
domain builder throws the warning, and later device assignment will
fail (or warn w/ override). In reality I think 1GB is rare. Making such
assumption to simplify implementation is reasonable.

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

2015-01-12 Thread Fabio Fantoni

Il 12/01/2015 11:06, Ian Campbell ha scritto:

On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote:

Il 12/01/2015 10:31, Ian Campbell ha scritto:

On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote:

In the meantime, I saw this:
http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
Based on the post above seems that phy will have important risk of data
loss if I understand good, from Ian Campbell post:

xl also uses qdisk for raw disk images instead of loop+blkback which
xend used, because there are concerns that the loop driver can lead to
data loss (by not implementing direct i/o the underlying device, and/or
not handling flushes correct, my memory is a bit fuzzy).

Stefano already corrected me on this in this very thread.

Ian.


Thanks for your reply.
I saw other posts about and if I understand good when O_DIRECT patches
will be in upstream loop driver the data loss risk will be solved, right?

Stefano says that O_DIRECT is not needed, only correct barrier semantics
are and he believes those are correctly implemented.


I not understand if manual changes and/or settings are needed, about 
this I mean:

only correct barrier semantics are and he believes those are correctly 
implemented

If yes what exactly?



O_DIRECT would be an additional layer of safety perhaps, but sounds to
be not strictly needed.

Ian.




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH resend] libxl, hotplug/Linux: default to phy backend for raw format file, take 2

2015-01-12 Thread Ian Campbell
On Mon, 2015-01-12 at 11:17 +0100, Fabio Fantoni wrote:
> Il 12/01/2015 11:06, Ian Campbell ha scritto:
> > On Mon, 2015-01-12 at 10:39 +0100, Fabio Fantoni wrote:
> >> Il 12/01/2015 10:31, Ian Campbell ha scritto:
> >>> On Mon, 2015-01-12 at 10:15 +0100, Fabio Fantoni wrote:
>  In the meantime, I saw this:
>  http://lists.xen.org/archives/html/xen-users/2015-01/msg00047.html
>  Based on the post above seems that phy will have important risk of data
>  loss if I understand good, from Ian Campbell post:
> > xl also uses qdisk for raw disk images instead of loop+blkback which
> > xend used, because there are concerns that the loop driver can lead to
> > data loss (by not implementing direct i/o the underlying device, and/or
> > not handling flushes correct, my memory is a bit fuzzy).
> >>> Stefano already corrected me on this in this very thread.
> >>>
> >>> Ian.
> >>>
> >> Thanks for your reply.
> >> I saw other posts about and if I understand good when O_DIRECT patches
> >> will be in upstream loop driver the data loss risk will be solved, right?
> > Stefano says that O_DIRECT is not needed, only correct barrier semantics
> > are and he believes those are correctly implemented.
> 
> I not understand if manual changes and/or settings are needed, about 
> this I mean:
> > only correct barrier semantics are and he believes those are correctly 
> > implemented
> If yes what exactly?

If Stefano is correct then it should all just work, no manual steps
needed (if manual steps were needed we wouldn't be taking this patch).

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] RFC XSM/evtchn: Never pretend to have successfully created a Xen event channel

2015-01-12 Thread Ian Campbell
On Mon, 2015-01-12 at 10:03 +, Andrew Cooper wrote:
> This is RFC because explicitly changes the logic introduced by c/s b34f2c375
> "xsm: label xen-consumer event channels", and is only compile tested.
> 
> Xen event channels are not internal resources.  They still have one end in a
> domain, and are created at the request of privileged domains.  This logic
> which "successfully" creates a Xen event channel opens up undesirable failure
> cases with ill-specified XSM policies.
> 
> If a domain is permitted to create ioreq servers or memevent listeners, but
> not to create event channels, the ioreq/memevent creation will succeed but
> attempting to bind the returned event channel will fail without any indication
> of a permission error.
> 
> Signed-off-by: Andrew Cooper 
> CC: Daniel De Graaf 
> CC: Keir Fraser 
> CC: Jan Beulich 
> CC: Tim Deegan 
> CC: Ian Campbell 
> CC: Ian Jackson 
> ---
>  xen/common/event_channel.c |4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
> index cfe4978..89a7d99 100644
> --- a/xen/common/event_channel.c
> +++ b/xen/common/event_channel.c
> @@ -1160,11 +1160,13 @@ int alloc_unbound_xen_event_channel(
>  chn = evtchn_from_port(d, port);
>  
>  rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid);
> +if ( rc )
> +goto out;

out here appears to return port, not rc so you aren't returning failure,
but an even more half setup port than before.

And I think you need to free the port on failure too.

>  
>  chn->state = ECS_UNBOUND;
>  chn->xen_consumer = get_xen_consumer(notification_fn);
>  chn->notify_vcpu_id = local_vcpu->vcpu_id;
> -chn->u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID;
> +chn->u.unbound.remote_domid = remote_domid;
>  
>   out:
>  spin_unlock(&d->event_lock);



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 11:12,  wrote:
>>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> Sent: Monday, January 12, 2015 6:09 PM
>> 
>> >>> On 12.01.15 at 10:56,  wrote:
>> > the result is related to another open whether we want to block guest
>> > boot for such problem. If 'warn' in domain builder is acceptable, we
>> > don't need to change lowmem for such rare 1GB case, just throws
>> > a warning for unnecessary conflictions (doesn't hurt if user doesn't
>> > assign it).
>> 
>> And how would you then deal with the one guest needing that
>> range reserved?
> 
> if guest needs the range, then report-all or report-sel doesn't matter.
> domain builder throws the warning, and later device assignment will
> fail (or warn w/ override). In reality I think 1GB is rare. Making such
> assumption to simplify implementation is reasonable.

One of my main problems with all you recent argumentation here
is the arbitrary use of the 1Gb boundary - there's nothing special
in this discussion with where the boundary is. Everything revolves
around the (undue) effect of report-all on domains not needing all
of the ranges found on the host.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] x86/MCE: allow overriding the CMCI threshold

2015-01-12 Thread Egger, Christoph
On 2015/01/12 9:44, Jan Beulich wrote:
> We've had reports of systems where CMCIs would surface at a relatively
> high rate during certain periods of time, without them apparently
> causing subsequent more severe problems (see Xeon E7-8800/4800/2800
> specification clarification SC1). Give the admin a knob to lower the
> impact on the system logs.
> 
> Signed-off-by: Jan Beulich 

A small comment at the bottom, besides of that:

Acked-by: Christoph Egger 

> 
> --- a/docs/misc/xen-command-line.markdown
> +++ b/docs/misc/xen-command-line.markdown
> @@ -242,6 +242,14 @@ the NMI watchdog is also enabled.
>  
>  If set, override Xen's default choice for the platform timer.
>  
> +### cmci-threshold
> +> `= `
> +
> +> Default: `2`
> +
> +Specify the event count threshold for raising Corrected Machine Check
> +Interrupts.  Specifying zero disables CMCI handling.
> +
>  ### cmos-rtc-probe
>  > `= `
>  
> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c
> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
> @@ -492,6 +492,9 @@ static int do_cmci_discover(int i)
>  {
>  unsigned msr = MSR_IA32_MCx_CTL2(i);
>  u64 val;
> +unsigned int threshold, max_threshold;
> +static unsigned int cmci_threshold = 2;
> +integer_param("cmci-threshold", cmci_threshold);
>  
>  rdmsrl(msr, val);
>  /* Some other CPU already owns this bank. */
> @@ -500,15 +503,28 @@ static int do_cmci_discover(int i)
>  goto out;
>  }
>  
> -val &= ~CMCI_THRESHOLD_MASK;
> -wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
> -rdmsrl(msr, val);
> +if ( cmci_threshold )
> +{
> +wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK);
> +rdmsrl(msr, val);
> +}
>  
>  if (!(val & CMCI_EN)) {
>  /* This bank does not support CMCI. Polling timer has to handle it. 
> */
>  mcabanks_set(i, __get_cpu_var(no_cmci_banks));
> +wrmsrl(msr, val & ~CMCI_THRESHOLD_MASK);
>  return 0;
>  }
> +max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK);
> +threshold = cmci_threshold;
> +if ( threshold > max_threshold )
> +{
> +   mce_printk(MCE_QUIET,
> +  "CMCI: threshold %#x too large for CPU%u bank %u, using 
> %#x\n",
> +  threshold, smp_processor_id(), i, max_threshold);
> +   threshold = max_threshold;
> +}
> +wrmsrl(msr, (val & ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold);
>  mcabanks_set(i, __get_cpu_var(mce_banks_owned));
>  out:
>  mcabanks_clear(i, __get_cpu_var(no_cmci_banks));
> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h
> +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h
> @@ -86,9 +86,6 @@
>  /* Bitfield of MSR_K8_HWCR register */
>  #define K8_HWCR_MCi_STATUS_WREN  (1ULL << 18)
>  
> -/*Intel Specific bitfield*/
> -#define CMCI_THRESHOLD   0x2
> -
>  #define MCi_MISC_ADDRMOD_MASK (0x7UL << 6)
>  #define MCi_MISC_PHYSMOD(0x2UL << 6)

I think these two are also Intel specific bitfields.
Please leave the comment for those.

Christoph


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] RFC XSM/evtchn: Never pretend to have successfully created a Xen event channel

2015-01-12 Thread Andrew Cooper
On 12/01/15 10:22, Ian Campbell wrote:
> On Mon, 2015-01-12 at 10:03 +, Andrew Cooper wrote:
>> This is RFC because explicitly changes the logic introduced by c/s b34f2c375
>> "xsm: label xen-consumer event channels", and is only compile tested.
>>
>> Xen event channels are not internal resources.  They still have one end in a
>> domain, and are created at the request of privileged domains.  This logic
>> which "successfully" creates a Xen event channel opens up undesirable failure
>> cases with ill-specified XSM policies.
>>
>> If a domain is permitted to create ioreq servers or memevent listeners, but
>> not to create event channels, the ioreq/memevent creation will succeed but
>> attempting to bind the returned event channel will fail without any 
>> indication
>> of a permission error.
>>
>> Signed-off-by: Andrew Cooper 
>> CC: Daniel De Graaf 
>> CC: Keir Fraser 
>> CC: Jan Beulich 
>> CC: Tim Deegan 
>> CC: Ian Campbell 
>> CC: Ian Jackson 
>> ---
>>  xen/common/event_channel.c |4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
>> index cfe4978..89a7d99 100644
>> --- a/xen/common/event_channel.c
>> +++ b/xen/common/event_channel.c
>> @@ -1160,11 +1160,13 @@ int alloc_unbound_xen_event_channel(
>>  chn = evtchn_from_port(d, port);
>>  
>>  rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid);
>> +if ( rc )
>> +goto out;
> out here appears to return port, not rc so you aren't returning failure,
> but an even more half setup port than before.

Ah yes - so I am.  rc was intended.  All callers do correctly check for
< 0 to indicate failure.

>
> And I think you need to free the port on failure too.

At the point that we bail, chn->state is still ECS_FREE so there is
nothing to deallocate.

~Andrew

>
>>  
>>  chn->state = ECS_UNBOUND;
>>  chn->xen_consumer = get_xen_consumer(notification_fn);
>>  chn->notify_vcpu_id = local_vcpu->vcpu_id;
>> -chn->u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID;
>> +chn->u.unbound.remote_domid = remote_domid;
>>  
>>   out:
>>  spin_unlock(&d->event_lock);
>



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2] x86/HVM: make hvm_efer_valid() honor guest features

2015-01-12 Thread Andrew Cooper
On 12/01/15 08:00, Jan Beulich wrote:
> Following the earlier similar change validating CR4 modifications.
>
> Signed-off-by: Jan Beulich 

Acked-by: Andrew Cooper 

> ---
> v2: consider CR0.PG during restore when checking EFER.LMA
>
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -1672,20 +1672,53 @@ static int hvm_save_cpu_ctxt(struct doma
>  return 0;
>  }
>  
> -static bool_t hvm_efer_valid(struct domain *d,
> - uint64_t value, uint64_t efer_validbits)
> +static bool_t hvm_efer_valid(const struct vcpu *v, uint64_t value,
> + signed int cr0_pg)
>  {
> -if ( nestedhvm_enabled(d) && cpu_has_svm )
> -efer_validbits |= EFER_SVME;
> +unsigned int ext1_ecx = 0, ext1_edx = 0;
>  
> -return !((value & ~efer_validbits) ||
> - ((sizeof(long) != 8) && (value & EFER_LME)) ||
> - (!cpu_has_svm && (value & EFER_SVME)) ||
> - (!cpu_has_nx && (value & EFER_NX)) ||
> - (!cpu_has_syscall && (value & EFER_SCE)) ||
> - (!cpu_has_lmsl && (value & EFER_LMSLE)) ||
> - (!cpu_has_ffxsr && (value & EFER_FFXSE)) ||
> - ((value & (EFER_LME|EFER_LMA)) == EFER_LMA));
> +if ( cr0_pg < 0 && !is_hardware_domain(v->domain) )
> +{
> +unsigned int level;
> +
> +ASSERT(v == current);
> +hvm_cpuid(0x8000, &level, NULL, NULL, NULL);
> +if ( level >= 0x8001 )
> +hvm_cpuid(0x8001, NULL, NULL, &ext1_ecx, &ext1_edx);
> +}
> +else
> +{
> +ext1_edx = boot_cpu_data.x86_capability[X86_FEATURE_LM / 32];
> +ext1_ecx = boot_cpu_data.x86_capability[X86_FEATURE_SVM / 32];
> +}
> +
> +if ( (value & EFER_SCE) &&
> + !(ext1_edx & cpufeat_mask(X86_FEATURE_SYSCALL)) )
> +return 0;
> +
> +if ( (value & (EFER_LME | EFER_LMA)) &&
> + !(ext1_edx & cpufeat_mask(X86_FEATURE_LM)) )
> +return 0;
> +
> +if ( cr0_pg > 0 && (value & EFER_LMA) && (!(value & EFER_LME) || 
> !cr0_pg) )
> +return 0;
> +
> +if ( (value & EFER_NX) && !(ext1_edx & cpufeat_mask(X86_FEATURE_NX)) )
> +return 0;
> +
> +if ( (value & EFER_SVME) &&
> + (!(ext1_ecx & cpufeat_mask(X86_FEATURE_SVM)) ||
> +  !nestedhvm_enabled(v->domain)) )
> +return 0;
> +
> +if ( (value & EFER_LMSLE) && !cpu_has_lmsl )
> +return 0;
> +
> +if ( (value & EFER_FFXSE) &&
> + !(ext1_edx & cpufeat_mask(X86_FEATURE_FFXSR)) )
> +return 0;
> +
> +return 1;
>  }
>  
>  /* These reserved bits in lower 32 remain 0 after any load of CR0 */
> @@ -1763,7 +1796,6 @@ static int hvm_load_cpu_ctxt(struct doma
>  struct vcpu *v;
>  struct hvm_hw_cpu ctxt;
>  struct segment_register seg;
> -uint64_t efer_validbits;
>  
>  /* Which vcpu is this? */
>  vcpuid = hvm_load_instance(h);
> @@ -1794,9 +1826,7 @@ static int hvm_load_cpu_ctxt(struct doma
>  return -EINVAL;
>  }
>  
> -efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_LMA
> -   | EFER_NX | EFER_SCE;
> -if ( !hvm_efer_valid(d, ctxt.msr_efer, efer_validbits) )
> +if ( !hvm_efer_valid(v, ctxt.msr_efer, MASK_EXTR(ctxt.cr0, X86_CR0_PG)) )
>  {
>  printk(XENLOG_G_ERR "HVM%d restore: bad EFER %#" PRIx64 "\n",
> d->domain_id, ctxt.msr_efer);
> @@ -2936,12 +2966,10 @@ err:
>  int hvm_set_efer(uint64_t value)
>  {
>  struct vcpu *v = current;
> -uint64_t efer_validbits;
>  
>  value &= ~EFER_LMA;
>  
> -efer_validbits = EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_NX | EFER_SCE;
> -if ( !hvm_efer_valid(v->domain, value, efer_validbits) )
> +if ( !hvm_efer_valid(v, value, -1) )
>  {
>  gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
>   "EFER: %#"PRIx64"\n", value);
>
>
>


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3] x86: also allow REP STOS emulation acceleration

2015-01-12 Thread Andrew Cooper
On 12/01/15 08:01, Jan Beulich wrote:
> While the REP MOVS acceleration appears to have helped qemu-traditional
> based guests, qemu-upstream (or really the respective video BIOSes)
> doesn't appear to benefit from that. Instead the acceleration added
> here provides a visible performance improvement during very early HVM
> guest boot.
>
> Signed-off-by: Jan Beulich 

Acked-by: Andrew Cooper 

> ---
> v3: Drop now pointless memory clobber from asm() in hvmemul_rep_stos()
> Introduce and use ASSERT_UNREACHABLE(), as suggested by Andrew.
> v2: Fix asm() constraints in hvmemul_rep_stos(), as pointed out by
> Andrew. Add output operand telling the compiler that "buf" is being
> written.
>
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -731,6 +731,17 @@ static int hvmemul_rep_movs_discard(
>  return X86EMUL_OKAY;
>  }
>  
> +static int hvmemul_rep_stos_discard(
> +void *p_data,
> +enum x86_segment seg,
> +unsigned long offset,
> +unsigned int bytes_per_rep,
> +unsigned long *reps,
> +struct x86_emulate_ctxt *ctxt)
> +{
> +return X86EMUL_OKAY;
> +}
> +
>  static int hvmemul_rep_outs_discard(
>  enum x86_segment src_seg,
>  unsigned long src_offset,
> @@ -982,6 +993,113 @@ static int hvmemul_rep_movs(
>  return X86EMUL_OKAY;
>  }
>  
> +static int hvmemul_rep_stos(
> +void *p_data,
> +enum x86_segment seg,
> +unsigned long offset,
> +unsigned int bytes_per_rep,
> +unsigned long *reps,
> +struct x86_emulate_ctxt *ctxt)
> +{
> +struct hvm_emulate_ctxt *hvmemul_ctxt =
> +container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> +unsigned long addr;
> +paddr_t gpa;
> +p2m_type_t p2mt;
> +bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
> +int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
> +   hvm_access_write, hvmemul_ctxt, 
> &addr);
> +
> +if ( rc == X86EMUL_OKAY )
> +{
> +uint32_t pfec = PFEC_page_present | PFEC_write_access;
> +
> +if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
> +pfec |= PFEC_user_mode;
> +
> +rc = hvmemul_linear_to_phys(
> +addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
> +}
> +if ( rc != X86EMUL_OKAY )
> +return rc;
> +
> +/* Check for MMIO op */
> +(void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
> +
> +switch ( p2mt )
> +{
> +unsigned long bytes;
> +void *buf;
> +
> +default:
> +/* Allocate temporary buffer. */
> +for ( ; ; )
> +{
> +bytes = *reps * bytes_per_rep;
> +buf = xmalloc_bytes(bytes);
> +if ( buf || *reps <= 1 )
> +break;
> +*reps >>= 1;
> +}
> +
> +if ( !buf )
> +buf = p_data;
> +else
> +switch ( bytes_per_rep )
> +{
> +unsigned long dummy;
> +
> +#define CASE(bits, suffix) \
> +case (bits) / 8:   \
> +asm ( "rep stos" #suffix   \
> +  : "=m" (*(char (*)[bytes])buf),  \
> +"=D" (dummy), "=c" (dummy) \
> +  : "a" (*(const uint##bits##_t *)p_data), \
> + "1" (buf), "2" (*reps) ); \
> +break
> +CASE(8, b);
> +CASE(16, w);
> +CASE(32, l);
> +CASE(64, q);
> +#undef CASE
> +
> +default:
> +ASSERT_UNREACHABLE();
> +xfree(buf);
> +return X86EMUL_UNHANDLEABLE;
> +}
> +
> +/* Adjust address for reverse store. */
> +if ( df )
> +gpa -= bytes - bytes_per_rep;
> +
> +rc = hvm_copy_to_guest_phys(gpa, buf, bytes);
> +
> +if ( buf != p_data )
> +xfree(buf);
> +
> +switch ( rc )
> +{
> +case HVMCOPY_gfn_paged_out:
> +case HVMCOPY_gfn_shared:
> +return X86EMUL_RETRY;
> +case HVMCOPY_okay:
> +return X86EMUL_OKAY;
> +}
> +
> +gdprintk(XENLOG_WARNING,
> + "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu 
> bytes_per_rep=%u\n",
> + gpa, *reps, bytes_per_rep);
> +/* fall through */
> +case p2m_mmio_direct:
> +return X86EMUL_UNHANDLEABLE;
> +
> +case p2m_mmio_dm:
> +return hvmemul_do_mmio(gpa, reps, bytes_per_rep, 0, IOREQ_WRITE, df,
> +   p_data);
> +}
> +}
> +
>  static int hvmemul_read_segment(
>  enum x86_segment seg,
>  struct segment_register *reg,
> @@ -1239,6 +1357,7 @@ static const struct x86_emulate_ops hvm_
>  .rep_ins   = hvmemul_rep_ins,
>  .rep_outs  = hvmemul_rep_outs,
>  .

Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications

2015-01-12 Thread David Vrabel
On 12/01/15 08:57, Jan Beulich wrote:
> The trivial wrapper evtchn_set_pending() is pretty pointless, as it
> only serves to invoke another wrapper evtchn_port_set_pending(). In
> turn, the latter is kind of inconsistent with its siblings in that is
> takes a struct vcpu * rather than a struct domain * - adjusting this
> allows for more efficient code in the majority of cases.

Reviewed-by: David Vrabel 

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] x86emul: tighten CLFLUSH emulation

2015-01-12 Thread Andrew Cooper
On 12/01/15 08:23, Jan Beulich wrote:
> While for us it's not as bad as it was for Linux, their commit
> 13e457e0ee ("KVM: x86: Emulator does not decode clflush well", by
> Nadav Amit ) nevertheless points out two
> shortcomings in our code: opcode 0F AE /7 is clflush only when it uses
> a memory mode (otherwise it's SFENCE) and when there's no REP prefix
> (an operand size prefix is fine, as that's CLFLUSHOPT).
>
> Signed-off-by: Jan Beulich 

Acked-by: Andrew Cooper 

>
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -4400,7 +4400,9 @@ x86_emulate(
>  case 0xae: /* Grp15 */
>  switch ( modrm_reg & 7 )
>  {
> -case 7: /* clflush */
> +case 7: /* clflush{,opt} */
> +fail_if(modrm_mod == 3);
> +fail_if(rep_prefix());
>  fail_if(ops->wbinvd == NULL);
>  if ( (rc = ops->wbinvd(ctxt)) != 0 )
>  goto done;
>
>
>
>
>
> ___
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] common/memory: fix an XSM error path

2015-01-12 Thread Andrew Cooper
On 12/01/15 08:21, Jan Beulich wrote:
> XENMEM_{in,de}crease_reservation as well as XENMEM_populate_physmap
> return the extent at which failure was detected, not error indicators.
>
> Signed-off-by: Jan Beulich 

Reviewed-by: Andrew Cooper 

>
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -747,11 +747,10 @@ long do_memory_op(unsigned long cmd, XEN
>  return start_extent;
>  args.domain = d;
>  
> -rc = xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d);
> -if ( rc )
> +if ( xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d) )
>  {
>  rcu_unlock_domain(d);
> -return rc;
> +return start_extent;
>  }
>  
>  switch ( op )
>
>
>
>
>
> ___
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [Patch V2 0/4] xen: correct several bugs in new p2m list setup

2015-01-12 Thread David Vrabel
On 12/01/15 05:05, Juergen Gross wrote:
> In the setup code of the linear mapped p2m list several bugs have
> been found, especially for 32 bit dom0. These patches correct the
> errors and make 32 bit dom0 bootable again.

Applied to stable/for-linus-3.19, thanks.

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH net] drivers: net: xen-netfront: remove residual dead code

2015-01-12 Thread David Vrabel
On 10/01/15 09:20, Vincenzo Maffione wrote:
> This patch removes some unused arrays from the netfront private
> data structures. These arrays were used in "flip" receive mode.

Reviewed-by: David Vrabel 

Thanks.

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] One question about the hypercall to translate gfn to mfn.

2015-01-12 Thread David Vrabel
On 09/01/15 08:02, Tian, Kevin wrote:
>> From: Tim Deegan [mailto:t...@xen.org]
>> Sent: Thursday, January 08, 2015 8:43 PM
>>
>> Hi,
>>
 Not really.  The IOMMU tables are also 64-bit so there must be enough
 addresses to map all of RAM.  There shouldn't be any need for these
 mappings to be _contiguous_, btw.  You just need to have one free
 address for each mapping.  Again, following how grant maps work, I'd
 imagine that PVH guests will allocate an unused GFN for each mapping
 and do enough bookkeeping to make sure they don't clash with other GFN
 users (grant mapping, ballooning, &c).  PV guests will probably be
 given a BFN by the hypervisor at map time (which will be == MFN in
 practice) and just needs to pass the same BFN to the unmap call later
 (it can store it in the GTT meanwhile).
>>>
>>> if possible prefer to make both consistent, i.e. always finding unused GFN?
>>
>> I don't think it will be possible.  PV domains are already using BFNs
>> supplied by Xen (in fact == MFN) for backend grant mappings, which
>> would conflict with supplying their own for these mappings.  But
>> again, I think the kernel maintainers for Xen may have a better idea
>> of how these interfaces are used inside the kernel.  For example,
>> it might be easy enough to wrap the two systems inside a common API
>> inside linux.   Again, following how grant mapping works seems like
>> the way forward.
>>
> 
> So Konrad, do you have any insight here? :-)

Malcolm took two pages of this notebook explaining to me how he thought
it should work (in combination with his PV IOMMU work), so I'll let him
explain.

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH for 2.3 v2 1/1] xen-hvm: increase maxmem before calling xc_domain_populate_physmap

2015-01-12 Thread Stefano Stabellini
On Wed, 3 Dec 2014, Don Slutz wrote:
> From: Stefano Stabellini 
> 
> Increase maxmem before calling xc_domain_populate_physmap_exact to
> avoid the risk of running out of guest memory. This way we can also
> avoid complex memory calculations in libxl at domain construction
> time.
> 
> This patch fixes an abort() when assigning more than 4 NICs to a VM.
> 
> Signed-off-by: Stefano Stabellini 
> Signed-off-by: Don Slutz 
> ---
> v2: Changes by Don Slutz
>   Switch from xc_domain_getinfo to xc_domain_getinfolist
>   Fix error check for xc_domain_getinfolist
>   Limit increase of maxmem to only do when needed:
> Add QEMU_SPARE_PAGES (How many pages to leave free)
> Add free_pages calculation
> 
>  xen-hvm.c | 19 +++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/xen-hvm.c b/xen-hvm.c
> index 7548794..d30e77e 100644
> --- a/xen-hvm.c
> +++ b/xen-hvm.c
> @@ -90,6 +90,7 @@ static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t 
> *shared_page, int vcpu)
>  #endif
>  
>  #define BUFFER_IO_MAX_DELAY  100
> +#define QEMU_SPARE_PAGES 16

We need a big comment here to explain why we have this parameter and
when we'll be able to get rid of it.

Other than that the patch is fine.

Thanks!


>  typedef struct XenPhysmap {
>  hwaddr start_addr;
> @@ -244,6 +245,8 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, 
> MemoryRegion *mr)
>  unsigned long nr_pfn;
>  xen_pfn_t *pfn_list;
>  int i;
> +xc_domaininfo_t info;
> +unsigned long free_pages;
>  
>  if (runstate_check(RUN_STATE_INMIGRATE)) {
>  /* RAM already populated in Xen */
> @@ -266,6 +269,22 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, 
> MemoryRegion *mr)
>  pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
>  }
>  
> +if ((xc_domain_getinfolist(xen_xc, xen_domid, 1, &info) != 1) ||
> +(info.domain != xen_domid)) {
> +hw_error("xc_domain_getinfolist failed");
> +}
> +free_pages = info.max_pages - info.tot_pages;
> +if (free_pages > QEMU_SPARE_PAGES) {
> +free_pages -= QEMU_SPARE_PAGES;
> +} else {
> +free_pages = 0;
> +}
> +if ((free_pages < nr_pfn) &&
> +(xc_domain_setmaxmem(xen_xc, xen_domid,
> + ((info.max_pages + nr_pfn - free_pages)
> +  << (XC_PAGE_SHIFT - 10))) < 0)) {
> +hw_error("xc_domain_setmaxmem failed");
> +}
>  if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, 
> pfn_list)) {
>  hw_error("xen: failed to populate ram at " RAM_ADDR_FMT, ram_addr);
>  }
> -- 
> 1.8.4
> 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Jan Beulich [mailto:jbeul...@suse.com]
> Sent: Monday, January 12, 2015 6:23 PM
> 
> >>> On 12.01.15 at 11:12,  wrote:
> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> Sent: Monday, January 12, 2015 6:09 PM
> >>
> >> >>> On 12.01.15 at 10:56,  wrote:
> >> > the result is related to another open whether we want to block guest
> >> > boot for such problem. If 'warn' in domain builder is acceptable, we
> >> > don't need to change lowmem for such rare 1GB case, just throws
> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't
> >> > assign it).
> >>
> >> And how would you then deal with the one guest needing that
> >> range reserved?
> >
> > if guest needs the range, then report-all or report-sel doesn't matter.
> > domain builder throws the warning, and later device assignment will
> > fail (or warn w/ override). In reality I think 1GB is rare. Making such
> > assumption to simplify implementation is reasonable.
> 
> One of my main problems with all you recent argumentation here
> is the arbitrary use of the 1Gb boundary - there's nothing special
> in this discussion with where the boundary is. Everything revolves
> around the (undue) effect of report-all on domains not needing all
> of the ranges found on the host.
> 

I'm not sure which part of my argument is not clear here. report-all
would be a problem here only if we want to fix all the conflictions
(then pulling unnecessary devices increases the confliction possibility) 
in the domain builder. but if we only fix reasonable ones (e.g. >3GB)
while warn other conflictions (e.g. <3G) in domain builder (let later 
assignment path to actually fail if confliction does matter), then we 
don't need to solve all conflictions in domain builder (if say 1G example
fixing it may instead reduce lowmem greatly) and then report-all 
may just add more warnings than report-sel for unused devices.

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread George Dunlap
On Fri, Jan 9, 2015 at 2:43 AM, Tian, Kevin  wrote:
>> From: George Dunlap
>> Sent: Thursday, January 08, 2015 8:55 PM
>>
>> On Thu, Jan 8, 2015 at 12:49 PM, George Dunlap
>>  wrote:
>> > If RMRRs almost always happen up above 2G, for example, then a simple
>> > solution that wouldn't require too much work would be to make sure
>> > that the PCI MMIO hole we specify to libxc and to qemu-upstream is big
>> > enough to include all RMRRs.  That would satisfy the libxc and qemu
>> > requirements.
>> >
>> > If we then store specific RMRRs we want included in xenstore,
>> > hvmloader can put them in the e820 map, and that would satisfy the
>> > hvmloader requirement.
>>
>> An alternate thing to do here would be to "properly" fix the
>> qemu-upstream problem, by making a way for hvmloader to communicate
>> changes in the gpfn layout to qemu.
>>
>> Then hvmloader could do the work of moving memory under RMRRs to
>> higher memory; and libxc wouldn't need to be involved at all.
>>
>> I think it would also fix our long-standing issues with assigning PCI
>> devices to qemu-upstream guests, which up until now have only been
>> worked around.
>>
>
> could you elaborate a bit for that long-standing issue?

So qemu-traditional didn't particularly expect to know the guest
memory layout.  qemu-upstream does; it expects to know what areas of
memory are guest memory and what areas of memory are unmapped.  If a
read or write happens to a gpfn which *xen* knows is valid, but which
*qemu-upstream* thinks is unmapped, then qemu-upstream will crash.

The problem though is that the guest's memory map is not actually
communicated to qemu-upstream in any way.  Originally, qemu-upstream
was only told how much memory the guest had, and it just "happens" to
choose the same guest memory layout as the libxc domain builder does.
This works, but it is bad design, because if libxc were to change for
some reason, people would have to simply remember to also change the
qemu-upstream layout.

Where this really bites us is in PCI pass-through.  The default <4G
MMIO hole is very small; and hvmloader naturally expects to be able to
make this area larger by relocating memory from below 4G to above 4G.
It moves the memory in Xen's p2m, but it has no way of communicating
this to qemu-upstream.  So when the guest does an MMIO instuction that
causes qemu-upstream to access that memory, the guest crashes.

There are two work-arounds at the moment:
1. A flag which tells hvmloader not to relocate memory
2. The option to tell qemu-upstream to make the memory hole larger.

Both are just work-arounds though; a "proper fix" would be to allow
hvmloader some way of telling qemu that the memory has moved, so it
can update its memory map.

This will (I'm pretty sure) have an effect on RMRR regions as well,
for the reasons I've mentioned above: whether make the "holes" for the
RMRRs in libxc or in hvmloader, if we *move* that memory up to the top
of the address space (rather than, say, just not giving that RAM to
the guest), then qemu-upstream's idea of the guest memory map will be
wrong, and will probably crash at some point.

Having the ability for hvmloader to populate and/or move the memory
around, and then tell qemu-upstream what the resulting map looked
like, would fix both the MMIO-resize issue and the RMRR problem, wrt
qemu-upstream.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH RFC] xen-time: decreasing the rating of the xen clocksource below that of the tsc clocksource for dom0's

2015-01-12 Thread David Vrabel
On 08/01/15 15:06, Imre Palik wrote:
> On 01/07/15 17:30, Ian Campbell wrote:
>> On Wed, 2015-01-07 at 17:16 +0100, Imre Palik wrote:
>>> From: "Palik, Imre" 
>>>
>>> In Dom0's the use of the TSC clocksource (whenever it is stable enough to
>>> be used) instead of the Xen clocksource should not cause any issues, as
>>> Dom0 VMs never live-migrated.
>>
>> Is this still true given that dom0's vcpus are migrated amongst pcpus on
>> the host? The tsc are not synchronised on some generations of hardware
>> so the result there would be the TSC appearing to do very odd things
>> under dom0's feet. Does Linux cope with that or does it not matter for
>> some other reason?
> 
> First of all, if the initial pcpus are not synchronised, linux won't consider
> TSC as a viable clocksource.
> 
> If the initial pcpus are synchronised, but then the dom0 vcpus are migrated to
> unsynchronised pcpus, then hopefully the tsc watchdog catches the issue, and
> the kernel falls back to the xen clocksource.  The issue here is that the
> watchdog can only detect clock skews bigger than 0.0625s/0.5s.  Hopefully this
> is enough to avoid the weirdest things.

I don't think any such hardware exists.  Either TSC is synchronized
across all CPUs or none.

> Also, some parts of the kernel (e.g., scheduling) will always use the paravirt
> clock.  No matter what priorities are set.
> 
> So it should be safe for some definition of safe.
> But I was unable to test it on a hardware platform that would hit the 
> problematic
> case described above.

Ok.  Can you list the various time sources and their ratings in the
commit message for clarity.  i.e, to justify 275 (below TSC = 300. above
hpet = 250).

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications

2015-01-12 Thread Andrew Cooper
On 12/01/15 08:57, Jan Beulich wrote:
> --- a/xen/include/xen/event.h
> +++ b/xen/include/xen/event.h
> @@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru
>  d->evtchn_port_ops->init(d, evtchn);
>  }
>  
> -static inline void evtchn_port_set_pending(struct vcpu *v,
> +static inline void evtchn_port_set_pending(struct domain *d,
> +   unsigned int vcpu_id,
> struct evtchn *evtchn)

I would rename this to the, now vacant, evtchn_set_pending().  It takes
an evtchn* not a port.  (Its sole caller was evtchn_set_pending(), so
the patch won't grow)

Furthermore, all callers except send_guest_vcpu_virq() currently use
evtchn->notify_vcpu_id to get a struct vcpu* to pass.  I think you can
drop the vcpu_id parameter and use evtchn->notify_vcpu_id directly,
which reduces the likelyhood of a bug where the evtchn is bound to one
vcpu but a caller gets the wrong id and raises the event channel on the
wrong vcpu.

~Andrew

>  {
> -v->domain->evtchn_port_ops->set_pending(v, evtchn);
> +d->evtchn_port_ops->set_pending(d->vcpu[vcpu_id], evtchn);
>  }
>  
>  static inline void evtchn_port_clear_pending(struct domain *d,
>
>
>
>
> ___
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed

2015-01-12 Thread Stefano Stabellini
On Wed, 24 Dec 2014, Liang Li wrote:
> Use the 'xl pci-attach $DomU $BDF' command to attach more then
> one PCI devices to the guest, then detach the devices with
> 'xl pci-detach $DomU $BDF', after that, re-attach these PCI
> devices again, an error message will be reported like following:
> 
> libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive
> an error message from QMP server: Duplicate ID 'pci-pt-03_10.1'
> for device.
> 
> The count of calling xen_pt_region_add and xen_pt_region_del are
> not the same will cause the XenPCIPassthroughState and it's related
> QemuOpts object not be released properly.

Thanks for the patch!

>From this description, I don't quite understand why the
memory_region_ref and memory_region_unref calls are wrong.  What do you
mean by "The count of calling xen_pt_region_add and xen_pt_region_del
are not the same"?

On unplug xen_pt_region_del does not get called?
Or the memory region argument is not exactly the same as the one
initially passed to xen_pt_region_add?


> Signed-off-by: Liang Li 
> Reported-by: Longtao Pang 
> ---
>  hw/xen/xen_pt.c | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
> index c1bf357..523b8a2 100644
> --- a/hw/xen/xen_pt.c
> +++ b/hw/xen/xen_pt.c
> @@ -588,7 +588,6 @@ static void xen_pt_region_add(MemoryListener *l, 
> MemoryRegionSection *sec)
>  XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
>   memory_listener);
>  
> -memory_region_ref(sec->mr);
>  xen_pt_region_update(s, sec, true);
>  }
>  
> @@ -598,7 +597,6 @@ static void xen_pt_region_del(MemoryListener *l, 
> MemoryRegionSection *sec)
>   memory_listener);
>  
>  xen_pt_region_update(s, sec, false);
> -memory_region_unref(sec->mr);
>  }
>  
>  static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec)
> @@ -606,7 +604,6 @@ static void xen_pt_io_region_add(MemoryListener *l, 
> MemoryRegionSection *sec)
>  XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
>   io_listener);
>  
> -memory_region_ref(sec->mr);
>  xen_pt_region_update(s, sec, true);
>  }
>  
> @@ -616,7 +613,6 @@ static void xen_pt_io_region_del(MemoryListener *l, 
> MemoryRegionSection *sec)
>   io_listener);
>  
>  xen_pt_region_update(s, sec, false);
> -memory_region_unref(sec->mr);
>  }
>  
>  static const MemoryListener xen_pt_memory_listener = {
> -- 
> 1.9.1
> 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c

2015-01-12 Thread Roger Pau Monné
El 12/01/15 a les 8.09, Bob Liu ha escrit:
> 
> On 01/09/2015 11:51 PM, Roger Pau Monné wrote:
>> El 06/01/15 a les 14.19, Bob Liu ha escrit:
>>> When there is no enough free grants, gnttab_alloc_grant_references()
>>> will fail and block request queue will stop.
>>> If the system is always lack of grants, blkif_restart_queue_callback() 
>>> can't be
>>> scheduled and block request queue can't be restart(block I/O hang).
>>>
>>> But when there are former requests complete, some grants may free to
>>> persistent_gnts_c, we can give the request queue another chance to restart 
>>> and
>>> avoid block hang.
>>>
>>> Reported-by: Junxiao Bi 
>>> Signed-off-by: Bob Liu 
>>> ---
>>>  drivers/block/xen-blkfront.c |   11 +++
>>>  1 file changed, 11 insertions(+)
>>>
>>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>>> index 2236c6f..dd30f99 100644
>>> --- a/drivers/block/xen-blkfront.c
>>> +++ b/drivers/block/xen-blkfront.c
>>> @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, 
>>> struct blkfront_info *info,
>>> }
>>> }
>>> }
>>> +
>>> +   /*
>>> +* Request queue would be stopped if failed to alloc enough grants and
>>> +* won't be restarted until gnttab_free_count >= info->callback->count.
>>> +*
>>> +* But there is another case, once we have enough persistent grants we
>>> +* can try to restart the request queue instead of continue to wait for
>>> +* 'gnttab_free_count'.
>>> +*/
>>> +   if (info->persistent_gnts_c >= info->callback.count)
>>> +   schedule_work(&info->work);
>>
>> I guess I'm missing something here, but blkif_completion is called by
>> blkif_interrupt, which in turn calls kick_pending_request_queues when
>> finished, which IMHO should be enough to restart the processing of requests.
>>
> 
> You are right, sorry for the mistake.
> 
> The problem we met was a xenblock I/O hang.
> Dumped data showed at that time info->persistent_gnt_c = 8, max_gref = 8
> but block request queue was still stopped.
> It's very hard to reproduce this issue, we only see it once.
> 
> I think there might be a race condition:
> 
> request A  request B:
> 
>info->persistent_gnts_c < max_grefs
>and fail to alloc enough grants
> 
> 
> 
> interrupt happen, blkif_complte():
> info->persistent_gnts_c++
> kick_pending_request_queues()
> 
> stop block request queue
> added to callback list
> 
> If the system don't have enough grants(but have enough persistent_gnts),
> request queue would still hang.

Not sure how can this happen, blkif_queue_request explicitly checks that
persistent_gnts_c < max_grefs before adding the callback and stopping
the request queue, so in your case the queue should not be blocked. Can
you dump the state of info->connected?

Roger.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Xen 4.5 Development Update (GA slip by a week), GA slip + 1 day

2015-01-12 Thread Lars Kurth

On 9 Jan 2015, at 18:30, Konrad Rzeszutek Wilk  wrote:

> On Fri, Jan 09, 2015 at 01:51:56PM +, Lars Kurth wrote:
>> Note that I published the Acknowledgements and most 4.5 documentation is now 
>> in place. Bits which you may want to check and fix
>> * Spelling of your name if it contains special characters as the scripts 
>> that I use nuke them - 
>> http://wiki.xenproject.org/wiki/Xen_Project_4.5_Acknowledgements
>> * Some additions to 
>> http://wiki.xenproject.org/wiki/Xen_Project_Release_Features, which I will do
>> 
>> And we do need to populate the Known Issues section of 
>> http://wiki.xenproject.org/wiki/Xen_Project_4.5_Release_Notes
> 
> I've added the one that I am aware (systemd).
> 
> In 4.4 we also had this 
> http://wiki.xenproject.org/wiki?title=Xen_Project_4.4_Feature_List
> which was linked of the  
> http://wiki.xenproject.org/wiki/Xen_Project_4.4_Release_Notes
> 
> I can populate most of htem or we can just reference the blog that will be on
> the release day?
Konrad, thank you. You don't need to: I created a separate page - see 
http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List
We can just link to ti from the release notes
Lars


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 12:22,  wrote:
>>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> Sent: Monday, January 12, 2015 6:23 PM
>> One of my main problems with all you recent argumentation here
>> is the arbitrary use of the 1Gb boundary - there's nothing special
>> in this discussion with where the boundary is. Everything revolves
>> around the (undue) effect of report-all on domains not needing all
>> of the ranges found on the host.
>> 
> 
> I'm not sure which part of my argument is not clear here. report-all
> would be a problem here only if we want to fix all the conflictions
> (then pulling unnecessary devices increases the confliction possibility) 
> in the domain builder. but if we only fix reasonable ones (e.g. >3GB)
> while warn other conflictions (e.g. <3G) in domain builder (let later 
> assignment path to actually fail if confliction does matter),

And have no way for the user to (securely) avoid that failure. Plus
the definition of "reasonable" here is of course going to be arbitrary.

Jan

> then we 
> don't need to solve all conflictions in domain builder (if say 1G example
> fixing it may instead reduce lowmem greatly) and then report-all 
> may just add more warnings than report-sel for unused devices.
> 
> Thanks
> Kevin




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Jan Beulich [mailto:jbeul...@suse.com]
> Sent: Monday, January 12, 2015 7:37 PM
> 
> >>> On 12.01.15 at 12:22,  wrote:
> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> Sent: Monday, January 12, 2015 6:23 PM
> >> One of my main problems with all you recent argumentation here
> >> is the arbitrary use of the 1Gb boundary - there's nothing special
> >> in this discussion with where the boundary is. Everything revolves
> >> around the (undue) effect of report-all on domains not needing all
> >> of the ranges found on the host.
> >>
> >
> > I'm not sure which part of my argument is not clear here. report-all
> > would be a problem here only if we want to fix all the conflictions
> > (then pulling unnecessary devices increases the confliction possibility)
> > in the domain builder. but if we only fix reasonable ones (e.g. >3GB)
> > while warn other conflictions (e.g. <3G) in domain builder (let later
> > assignment path to actually fail if confliction does matter),
> 
> And have no way for the user to (securely) avoid that failure. Plus
> the definition of "reasonable" here is of course going to be arbitrary.
> 
> Jan

actually here I didn't get your point then. It's your proposal to make 
reasonable assumption like below:

---
d) Move down the lowmem RAM/MMIO boundary so that a single,
contiguous chunk of lowmem results, with all other RAM moving up
beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be
considered here, and I think we can reasonably safely assume that
no RMRRs will ever report ranges above 1Mb but below the host
lowmem RAM/MMIO boundary (i.e. we can presumably rest assured
that the lowmem chunk will always be reasonably big).
---

Thanks
Kevin

> 
> > then we
> > don't need to solve all conflictions in domain builder (if say 1G example
> > fixing it may instead reduce lowmem greatly) and then report-all
> > may just add more warnings than report-sel for unused devices.
> >
> > Thanks
> > Kevin
> 
> 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] evtchn: simplify sending of notifications

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 12:33,  wrote:
> On 12/01/15 08:57, Jan Beulich wrote:
>> --- a/xen/include/xen/event.h
>> +++ b/xen/include/xen/event.h
>> @@ -152,10 +152,11 @@ static inline void evtchn_port_init(stru
>>  d->evtchn_port_ops->init(d, evtchn);
>>  }
>>  
>> -static inline void evtchn_port_set_pending(struct vcpu *v,
>> +static inline void evtchn_port_set_pending(struct domain *d,
>> +   unsigned int vcpu_id,
>> struct evtchn *evtchn)
> 
> I would rename this to the, now vacant, evtchn_set_pending().  It takes
> an evtchn* not a port.  (Its sole caller was evtchn_set_pending(), so
> the patch won't grow)

No (and I had actually considered it) - that would get its name out of
sync with all its sibling wrappers.

> Furthermore, all callers except send_guest_vcpu_virq() currently use
> evtchn->notify_vcpu_id to get a struct vcpu* to pass.  I think you can
> drop the vcpu_id parameter and use evtchn->notify_vcpu_id directly,
> which reduces the likelyhood of a bug where the evtchn is bound to one
> vcpu but a caller gets the wrong id and raises the event channel on the
> wrong vcpu.

Generally a nice idea, but it doesn't immediately/obviously fit with
the use in send_guest_vcpu_virq().

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed

2015-01-12 Thread Tian, Kevin
> From: Stefano Stabellini
> Sent: Monday, January 12, 2015 7:36 PM
> 
> On Wed, 24 Dec 2014, Liang Li wrote:
> > Use the 'xl pci-attach $DomU $BDF' command to attach more then
> > one PCI devices to the guest, then detach the devices with
> > 'xl pci-detach $DomU $BDF', after that, re-attach these PCI
> > devices again, an error message will be reported like following:
> >
> > libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive
> > an error message from QMP server: Duplicate ID 'pci-pt-03_10.1'
> > for device.
> >
> > The count of calling xen_pt_region_add and xen_pt_region_del are
> > not the same will cause the XenPCIPassthroughState and it's related
> > QemuOpts object not be released properly.
> 
> Thanks for the patch!
> 
> From this description, I don't quite understand why the
> memory_region_ref and memory_region_unref calls are wrong.  What do
> you
> mean by "The count of calling xen_pt_region_add and xen_pt_region_del
> are not the same"?
> 
> On unplug xen_pt_region_del does not get called?
> Or the memory region argument is not exactly the same as the one
> initially passed to xen_pt_region_add?
> 

agree. Liang, could you elaborate how the patch is associated with above 
explanation? :-)

> 
> > Signed-off-by: Liang Li 
> > Reported-by: Longtao Pang 
> > ---
> >  hw/xen/xen_pt.c | 4 
> >  1 file changed, 4 deletions(-)
> >
> > diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
> > index c1bf357..523b8a2 100644
> > --- a/hw/xen/xen_pt.c
> > +++ b/hw/xen/xen_pt.c
> > @@ -588,7 +588,6 @@ static void xen_pt_region_add(MemoryListener *l,
> MemoryRegionSection *sec)
> >  XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
> >   memory_listener);
> >
> > -memory_region_ref(sec->mr);
> >  xen_pt_region_update(s, sec, true);
> >  }
> >
> > @@ -598,7 +597,6 @@ static void xen_pt_region_del(MemoryListener *l,
> MemoryRegionSection *sec)
> >   memory_listener);
> >
> >  xen_pt_region_update(s, sec, false);
> > -memory_region_unref(sec->mr);
> >  }
> >
> >  static void xen_pt_io_region_add(MemoryListener *l,
> MemoryRegionSection *sec)
> > @@ -606,7 +604,6 @@ static void xen_pt_io_region_add(MemoryListener
> *l, MemoryRegionSection *sec)
> >  XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
> >   io_listener);
> >
> > -memory_region_ref(sec->mr);
> >  xen_pt_region_update(s, sec, true);
> >  }
> >
> > @@ -616,7 +613,6 @@ static void xen_pt_io_region_del(MemoryListener *l,
> MemoryRegionSection *sec)
> >   io_listener);
> >
> >  xen_pt_region_update(s, sec, false);
> > -memory_region_unref(sec->mr);
> >  }
> >
> >  static const MemoryListener xen_pt_memory_listener = {
> > --
> > 1.9.1
> >
> 
> ___
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 12:41,  wrote:
>>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> Sent: Monday, January 12, 2015 7:37 PM
>> 
>> >>> On 12.01.15 at 12:22,  wrote:
>> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> >> Sent: Monday, January 12, 2015 6:23 PM
>> >> One of my main problems with all you recent argumentation here
>> >> is the arbitrary use of the 1Gb boundary - there's nothing special
>> >> in this discussion with where the boundary is. Everything revolves
>> >> around the (undue) effect of report-all on domains not needing all
>> >> of the ranges found on the host.
>> >>
>> >
>> > I'm not sure which part of my argument is not clear here. report-all
>> > would be a problem here only if we want to fix all the conflictions
>> > (then pulling unnecessary devices increases the confliction possibility)
>> > in the domain builder. but if we only fix reasonable ones (e.g. >3GB)
>> > while warn other conflictions (e.g. <3G) in domain builder (let later
>> > assignment path to actually fail if confliction does matter),
>> 
>> And have no way for the user to (securely) avoid that failure. Plus
>> the definition of "reasonable" here is of course going to be arbitrary.
> 
> actually here I didn't get your point then. It's your proposal to make 
> reasonable assumption like below:
> 
> ---
> d) Move down the lowmem RAM/MMIO boundary so that a single,
> contiguous chunk of lowmem results, with all other RAM moving up
> beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be
> considered here, and I think we can reasonably safely assume that
> no RMRRs will ever report ranges above 1Mb but below the host
> lowmem RAM/MMIO boundary (i.e. we can presumably rest assured
> that the lowmem chunk will always be reasonably big).

Correct - but my point is that this won't work well with your report-all
mechanism, only with the report-sel one.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Ian Campbell
On Fri, 2015-01-09 at 15:27 -0500, Konrad Rzeszutek Wilk wrote:
> On Thu, Jan 08, 2015 at 06:02:04PM +, George Dunlap wrote:
> > On Thu, Jan 8, 2015 at 4:10 PM, Jan Beulich  wrote:
> >  On 08.01.15 at 16:59,  wrote:
> > >> On Thu, Jan 8, 2015 at 1:54 PM, Jan Beulich  wrote:
> >  the 1st invocation of this interface will save all reported reserved
> >  regions under domain structure, and later invocation (e.g. from
> >  hvmloader) gets saved content.
> > >>>
> > >>> Why would the reserved regions need attaching to the domain
> > >>> structure? The combination of (to be) assigned devices and
> > >>> global RMRR list always allow reproducing the intended set of
> > >>> regions without any extra storage.
> > >>
> > >> So when you say "(to be) assigned devices", you mean any device which
> > >> is currently assigned, *or may be assigned at some point in the
> > >> future*?
> > >
> > > Yes.
> > >
> > >> Do you think the extra storage for "this VM might possibly be assigned
> > >> this device at some point" wouldn't really be that much bigger than
> > >> "this VM might possibly map this RMRR at some point in the future"?
> > >
> > > Since listing devices without RMRR association would be pointless,
> > > I think a list of devices would require less storage. But see below.
> > >
> > >> It seems a lot cleaner to me to have the toolstack tell Xen what
> > >> ranges are reserved for RMRR per VM, and then have Xen check again
> > >> when assigning a device to make sure that the RMRRs have already been
> > >> reserved.
> > >
> > > With an extra level of what can be got wrong by the admin.
> > > However, I now realize that doing it this way would allow
> > > specifying regions not associated with any device on the host
> > > the guest boots on, but associated with one on a host the guest
> > > may later migrate to.
> > 
> > I did say the toolstack, not the admin. :-)
> > 
> > At the xl level, I envisioned a single boolean that would say, "Make
> > my memory layout resemble the host system" -- so the MMIO hole would
> > be the same size, and all the RMRRs would be reserved.
> 
> Like the e820_host=1 ? :-)

I'd been thinking about that all the way down this thread ;-) It seems
like a fairly reasonable approach, and the interfaces (e.g. get host
memory e820) are mostly already there. But maybe there are HVM specific
reasons why its not...

Ian.



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread George Dunlap
On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin  wrote:
>> From: Jan Beulich [mailto:jbeul...@suse.com]
>> Sent: Monday, January 12, 2015 6:23 PM
>>
>> >>> On 12.01.15 at 11:12,  wrote:
>> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> >> Sent: Monday, January 12, 2015 6:09 PM
>> >>
>> >> >>> On 12.01.15 at 10:56,  wrote:
>> >> > the result is related to another open whether we want to block guest
>> >> > boot for such problem. If 'warn' in domain builder is acceptable, we
>> >> > don't need to change lowmem for such rare 1GB case, just throws
>> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't
>> >> > assign it).
>> >>
>> >> And how would you then deal with the one guest needing that
>> >> range reserved?
>> >
>> > if guest needs the range, then report-all or report-sel doesn't matter.
>> > domain builder throws the warning, and later device assignment will
>> > fail (or warn w/ override). In reality I think 1GB is rare. Making such
>> > assumption to simplify implementation is reasonable.
>>
>> One of my main problems with all you recent argumentation here
>> is the arbitrary use of the 1Gb boundary - there's nothing special
>> in this discussion with where the boundary is. Everything revolves
>> around the (undue) effect of report-all on domains not needing all
>> of the ranges found on the host.
>>
>
> I'm not sure which part of my argument is not clear here. report-all
> would be a problem here only if we want to fix all the conflictions
> (then pulling unnecessary devices increases the confliction possibility)
> in the domain builder. but if we only fix reasonable ones (e.g. >3GB)
> while warn other conflictions (e.g. <3G) in domain builder (let later
> assignment path to actually fail if confliction does matter), then we
> don't need to solve all conflictions in domain builder (if say 1G example
> fixing it may instead reduce lowmem greatly) and then report-all
> may just add more warnings than report-sel for unused devices.

You keep saying "report-all" or "report-sel", but I'm not 100% clear
what you mean by those.  In any case, the naming has got to be a bit
misleading: the important questions at the moment, AFAICT, are:

1. Whether we make holes at boot time for all RMRRs on the system, or
whether only make RMRRs for some subset (or potentially some other
arbitrary range, which may include RMRRs on other hosts to which we
may want to migrate).

2. Whether those holes are made by the domain builder in libxc, or by hvmloader

3. What happens if Xen is asked to assign a device and it finds that
the required RMRR is not empty:
 a. during guest creation
 b. after the guest has booted

Obviously at some point some part of the toolstack needs to identify
which RMRRs go with what device, so that either libxc or hvmloader can
make the appropriate holes in the address space; but at that point,
"report" is not so much the right word as "query".  (Obviously we want
to "report" in the e820 map all RMRRs that we've made holes for in the
guest.)

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Jan Beulich [mailto:jbeul...@suse.com]
> Sent: Monday, January 12, 2015 8:03 PM
> 
> >>> On 12.01.15 at 12:41,  wrote:
> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> Sent: Monday, January 12, 2015 7:37 PM
> >>
> >> >>> On 12.01.15 at 12:22,  wrote:
> >> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> >> Sent: Monday, January 12, 2015 6:23 PM
> >> >> One of my main problems with all you recent argumentation here
> >> >> is the arbitrary use of the 1Gb boundary - there's nothing special
> >> >> in this discussion with where the boundary is. Everything revolves
> >> >> around the (undue) effect of report-all on domains not needing all
> >> >> of the ranges found on the host.
> >> >>
> >> >
> >> > I'm not sure which part of my argument is not clear here. report-all
> >> > would be a problem here only if we want to fix all the conflictions
> >> > (then pulling unnecessary devices increases the confliction possibility)
> >> > in the domain builder. but if we only fix reasonable ones (e.g. >3GB)
> >> > while warn other conflictions (e.g. <3G) in domain builder (let later
> >> > assignment path to actually fail if confliction does matter),
> >>
> >> And have no way for the user to (securely) avoid that failure. Plus
> >> the definition of "reasonable" here is of course going to be arbitrary.
> >
> > actually here I didn't get your point then. It's your proposal to make
> > reasonable assumption like below:
> >
> > ---
> > d) Move down the lowmem RAM/MMIO boundary so that a single,
> > contiguous chunk of lowmem results, with all other RAM moving up
> > beyond 4Gb. Of course RMRRs below the 1Mb boundary must not be
> > considered here, and I think we can reasonably safely assume that
> > no RMRRs will ever report ranges above 1Mb but below the host
> > lowmem RAM/MMIO boundary (i.e. we can presumably rest assured
> > that the lowmem chunk will always be reasonably big).
> 
> Correct - but my point is that this won't work well with your report-all
> mechanism, only with the report-sel one.
> 

I've explained this several times. If there's a violation on above assumption 
from required devices, same for report-all and report-sel. If the violation is 
caused by unnecessary devices, please note I'm proposing 'warn' here so
report-all at most just adds more warnings in domain builder. the conflict
will be caught later if it becomes relevant to be assigned (e.g. thru hotplug).

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 00/11] Alternate p2m: support multiple copies of host p2m

2015-01-12 Thread Ian Jackson
Ed White writes ("[PATCH 00/11] Alternate p2m: support multiple copies of host 
p2m"):
> This set of patches adds support to hvm domains for EPTP switching
> by creating multiple copies of the host p2m (currently limited to 10
> copies).

Thanks for this.  Did you CC me in my capacity as tools maintainer ?
I can't see anything in this series which deals with any necessary
tools changes.

Are there tools parts to come later ?

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output

2015-01-12 Thread Ian Jackson
Ian Campbell writes ("[OSSTEST PATCH] mg-debian-installer-update: produce 
deterministic output"):
> Currently rerunning mg-debian-install-update when the external files
> have changed still produces differences in the local files produced
  ^
Missing "not" ?

> during post-processing.
> 
> Avoid these differences by:
> 
>   - Using gzip -n, which avoids storing a timestamp in the gzip
> header (as well as the name, which we don't need).
>   - Using pax -M norm, which normalises all timestamps (among other
> things, such as the owner, which we don't care about)
>   - Using tar --mtime, with a reference within the dpkg-deb created
> hierarchy (which has timestamps from the package and is therefore
> dependent only on the downloaded package revision)
> 
> With this the results of two invocations of
> mg-debian-installer-update(-all) are identical (assuming no changes to
> the downloaded files) as demonstrated by runnign this quick hack:

Excellent.  Apart from the missing word in the description,

Acked-by: Ian Jackson 

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Ian Campbell
On Mon, 2015-01-12 at 12:13 +, George Dunlap wrote:
> On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin  wrote:
> >> From: Jan Beulich [mailto:jbeul...@suse.com]
> >> Sent: Monday, January 12, 2015 6:23 PM
> >>
> >> >>> On 12.01.15 at 11:12,  wrote:
> >> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> >> Sent: Monday, January 12, 2015 6:09 PM
> >> >>
> >> >> >>> On 12.01.15 at 10:56,  wrote:
> >> >> > the result is related to another open whether we want to block guest
> >> >> > boot for such problem. If 'warn' in domain builder is acceptable, we
> >> >> > don't need to change lowmem for such rare 1GB case, just throws
> >> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't
> >> >> > assign it).
> >> >>
> >> >> And how would you then deal with the one guest needing that
> >> >> range reserved?
> >> >
> >> > if guest needs the range, then report-all or report-sel doesn't matter.
> >> > domain builder throws the warning, and later device assignment will
> >> > fail (or warn w/ override). In reality I think 1GB is rare. Making such
> >> > assumption to simplify implementation is reasonable.
> >>
> >> One of my main problems with all you recent argumentation here
> >> is the arbitrary use of the 1Gb boundary - there's nothing special
> >> in this discussion with where the boundary is. Everything revolves
> >> around the (undue) effect of report-all on domains not needing all
> >> of the ranges found on the host.
> >>
> >
> > I'm not sure which part of my argument is not clear here. report-all
> > would be a problem here only if we want to fix all the conflictions
> > (then pulling unnecessary devices increases the confliction possibility)
> > in the domain builder. but if we only fix reasonable ones (e.g. >3GB)
> > while warn other conflictions (e.g. <3G) in domain builder (let later
> > assignment path to actually fail if confliction does matter), then we
> > don't need to solve all conflictions in domain builder (if say 1G example
> > fixing it may instead reduce lowmem greatly) and then report-all
> > may just add more warnings than report-sel for unused devices.
> 
> You keep saying "report-all" or "report-sel", but I'm not 100% clear
> what you mean by those.

Is the distinction between "all reserved areas" and "only (selectively)
those which are related to an RMRR"? That's how I've been reading it...

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output

2015-01-12 Thread Ian Jackson
Ian Campbell writes ("Re: [Xen-devel] [OSSTEST PATCH] 
mg-debian-installer-update: produce deterministic output"):
> We might like to consider something along these lines for the future:

A reasonable idea (although I wonder if it should be configurable).

Acked-by: Ian Jackson 

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: George Dunlap
> Sent: Monday, January 12, 2015 8:14 PM
> 
> On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin  wrote:
> >> From: Jan Beulich [mailto:jbeul...@suse.com]
> >> Sent: Monday, January 12, 2015 6:23 PM
> >>
> >> >>> On 12.01.15 at 11:12,  wrote:
> >> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> >> >> Sent: Monday, January 12, 2015 6:09 PM
> >> >>
> >> >> >>> On 12.01.15 at 10:56,  wrote:
> >> >> > the result is related to another open whether we want to block guest
> >> >> > boot for such problem. If 'warn' in domain builder is acceptable, we
> >> >> > don't need to change lowmem for such rare 1GB case, just throws
> >> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't
> >> >> > assign it).
> >> >>
> >> >> And how would you then deal with the one guest needing that
> >> >> range reserved?
> >> >
> >> > if guest needs the range, then report-all or report-sel doesn't matter.
> >> > domain builder throws the warning, and later device assignment will
> >> > fail (or warn w/ override). In reality I think 1GB is rare. Making such
> >> > assumption to simplify implementation is reasonable.
> >>
> >> One of my main problems with all you recent argumentation here
> >> is the arbitrary use of the 1Gb boundary - there's nothing special
> >> in this discussion with where the boundary is. Everything revolves
> >> around the (undue) effect of report-all on domains not needing all
> >> of the ranges found on the host.
> >>
> >
> > I'm not sure which part of my argument is not clear here. report-all
> > would be a problem here only if we want to fix all the conflictions
> > (then pulling unnecessary devices increases the confliction possibility)
> > in the domain builder. but if we only fix reasonable ones (e.g. >3GB)
> > while warn other conflictions (e.g. <3G) in domain builder (let later
> > assignment path to actually fail if confliction does matter), then we
> > don't need to solve all conflictions in domain builder (if say 1G example
> > fixing it may instead reduce lowmem greatly) and then report-all
> > may just add more warnings than report-sel for unused devices.
> 
> You keep saying "report-all" or "report-sel", but I'm not 100% clear
> what you mean by those.  In any case, the naming has got to be a bit
> misleading: the important questions at the moment, AFAICT, are:

I explained them in original proposal

> 
> 1. Whether we make holes at boot time for all RMRRs on the system, or
> whether only make RMRRs for some subset (or potentially some other
> arbitrary range, which may include RMRRs on other hosts to which we
> may want to migrate).

I use 'report-all' to stand for making holes for all RMRRs on the system,
while 'report-sel' for specified subset.

including other RMRRs (from admin for migration) is orthogonal to
above open.

> 
> 2. Whether those holes are made by the domain builder in libxc, or by
> hvmloader

based on current discussion, whether to make holes in hvmloader
doesn't bring fundamental difference. as long as domain builder
still need to populate memory (even minimal for hvmloader to boot),
it needs to check conflict and may ideally make hole too (though we
may make assumption not doing that)

> 
> 3. What happens if Xen is asked to assign a device and it finds that
> the required RMRR is not empty:
>  a. during guest creation
>  b. after the guest has booted

for Xen we don't need differentiate a/b. by default it's clear failure
should be returned as it implies a security/correctness issue if
moving forward. but based on discussion an override to 'warn' only
is preferred, so admin can make decision (remains an open on
whether to do global override or per-device override)

> 
> Obviously at some point some part of the toolstack needs to identify
> which RMRRs go with what device, so that either libxc or hvmloader can
> make the appropriate holes in the address space; but at that point,
> "report" is not so much the right word as "query".  (Obviously we want
> to "report" in the e820 map all RMRRs that we've made holes for in the
> guest.)

yes, using 'report' doesn't catch all the changes we need to make. Just
use them to simplify discussion in case all are on the same page. However
clearly my original explanation didn't make it. :/

and state my major intention again. I don't think the preparation (i.e.
detect confliction and make holes) for device assignment should be a 
a blocking failure. Throw warning should be enough (i.e. in libxc). We
should let actual device assignment path to make final call based on
admin's configuration (default 'fail' w/ 'warn' override). Based on that
policy I think 'report-all' (making holes for all host RMRRs) is an
acceptable approach, w/ small impact on possibly more warning 
messages (actually not bad to help admin understand the hotplug
possibility on this platform) and show more reserved regions to the
end user (but he shouldn't make any assumption here). :-)

Thanks
Kevin

Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Tian, Kevin
> Sent: Monday, January 12, 2015 8:29 PM
> 
> > From: George Dunlap
> > Sent: Monday, January 12, 2015 8:14 PM
> >
> > On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin 
> wrote:
> > >> From: Jan Beulich [mailto:jbeul...@suse.com]
> > >> Sent: Monday, January 12, 2015 6:23 PM
> > >>
> > >> >>> On 12.01.15 at 11:12,  wrote:
> > >> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
> > >> >> Sent: Monday, January 12, 2015 6:09 PM
> > >> >>
> > >> >> >>> On 12.01.15 at 10:56,  wrote:
> > >> >> > the result is related to another open whether we want to block guest
> > >> >> > boot for such problem. If 'warn' in domain builder is acceptable, we
> > >> >> > don't need to change lowmem for such rare 1GB case, just throws
> > >> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't
> > >> >> > assign it).
> > >> >>
> > >> >> And how would you then deal with the one guest needing that
> > >> >> range reserved?
> > >> >
> > >> > if guest needs the range, then report-all or report-sel doesn't matter.
> > >> > domain builder throws the warning, and later device assignment will
> > >> > fail (or warn w/ override). In reality I think 1GB is rare. Making such
> > >> > assumption to simplify implementation is reasonable.
> > >>
> > >> One of my main problems with all you recent argumentation here
> > >> is the arbitrary use of the 1Gb boundary - there's nothing special
> > >> in this discussion with where the boundary is. Everything revolves
> > >> around the (undue) effect of report-all on domains not needing all
> > >> of the ranges found on the host.
> > >>
> > >
> > > I'm not sure which part of my argument is not clear here. report-all
> > > would be a problem here only if we want to fix all the conflictions
> > > (then pulling unnecessary devices increases the confliction possibility)
> > > in the domain builder. but if we only fix reasonable ones (e.g. >3GB)
> > > while warn other conflictions (e.g. <3G) in domain builder (let later
> > > assignment path to actually fail if confliction does matter), then we
> > > don't need to solve all conflictions in domain builder (if say 1G example
> > > fixing it may instead reduce lowmem greatly) and then report-all
> > > may just add more warnings than report-sel for unused devices.
> >
> > You keep saying "report-all" or "report-sel", but I'm not 100% clear
> > what you mean by those.  In any case, the naming has got to be a bit
> > misleading: the important questions at the moment, AFAICT, are:
> 
> I explained them in original proposal
> 
> >
> > 1. Whether we make holes at boot time for all RMRRs on the system, or
> > whether only make RMRRs for some subset (or potentially some other
> > arbitrary range, which may include RMRRs on other hosts to which we
> > may want to migrate).
> 
> I use 'report-all' to stand for making holes for all RMRRs on the system,
> while 'report-sel' for specified subset.
> 

more accurate... 'report-sel' for making holes for RMRRs belonging to 
specified devices.

Thanks
Kevin
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [OSSTEST PATCH] mg-debian-installer-update: produce deterministic output

2015-01-12 Thread Ian Campbell
On Mon, 2015-01-12 at 12:27 +, Ian Jackson wrote:
> Ian Campbell writes ("Re: [Xen-devel] [OSSTEST PATCH] 
> mg-debian-installer-update: produce deterministic output"):
> > We might like to consider something along these lines for the future:
> 
> A reasonable idea (although I wonder if it should be configurable).

I did consider just inserting a $CURL_OPTS which could be set to include
pragma no-cache from the command line. I mostly didn't because I was too
lazy to think about the correct shell quoting at the time...

> 
> Acked-by: Ian Jackson 
> 
> Ian.



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] xl only waits 33 seconds for ballooning to complete

2015-01-12 Thread George Dunlap
On Thu, Jan 8, 2015 at 1:11 AM, Mike Latimer  wrote:
> On Wednesday, January 07, 2015 09:38:31 AM Ian Campbell wrote:
>> That's exactly what I was about to suggest as I read the penultimate
>> paragraph, i.e. keep waiting so long as some reasonable delta occurs on
>> each iteration.
>
> Thanks, Ian.
>
> I wonder if there is a future-safe threshold on the amount of delta that
> indicates progress is being made. Should some minimum safe progress amount or
> percentage be set, or is it better to just make sure free memory is increasing
> at the end of each iteration of the loop?
>
> For example, the following simple change just tracks free_memkb and only
> decrements the retry count if it has not increased since the last check:
>
> --
> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
> index ed0d478..4cf2991 100644
> --- a/tools/libxl/xl_cmdimpl.c
> +++ b/tools/libxl/xl_cmdimpl.c
> @@ -2196,7 +2196,7 @@ static int preserve_domain(uint32_t *r_domid,
> libxl_event *event,
>  static int freemem(uint32_t domid, libxl_domain_build_info *b_info)
>  {
>  int rc, retries = 3;
> -uint32_t need_memkb, free_memkb;
> +uint32_t need_memkb, free_memkb, free_memkb_prev = 0;
>
>  if (!autoballoon)
>  return 0;
> @@ -2229,7 +2229,10 @@ static int freemem(uint32_t domid,
> libxl_domain_build_info *b_info)
>  if (rc < 0)
>  return rc;
>
> -retries--;
> +/* only decrement retry count if free_memkb is not increasing */
> +if (free_memkb <= free_memkb_prev)
> +retries--;
> +free_memkb_prev = free_memkb;

I would:
1. Reset the retries after a successful increase
2. Not allow free_memkb_prev to go down.

So maybe something like the following?

 if (free_memkb <= free_memkb_prev) {
   retries--;
 } else {
   retries = MAX_RETRIES;
   free_memkb_prev = free_memkb;
 }

I'm inclined to say we could add an option to say "wait forever", or
to increase the period of the checks; but ultimately at some point
someone (either xl or the human) needs to timeout and say, "This is
never going to finish".  10s seems like a very conservative default.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [xen-4.5-testing test] 33303: regressions - FAIL

2015-01-12 Thread Ian Jackson
xen.org writes ("[xen-4.5-testing test] 33303: regressions - FAIL"):
> flight 33303 xen-4.5-testing real [real]
> http://www.chiark.greenend.org.uk/~xensrcts/logs/33303/
> 
> Regressions :-(
> 
> Tests which did not succeed and are blocking,
> including tests which could not be run:
>  test-amd64-i386-qemuu-rhel6hvm-intel  7 redhat-installfail REGR. vs. 
> 33264

osstest is very badly overloaded right now so everything is taking a
long time.

To see where the release was at I peeked at the results of the
currently running test (flight 33348) and it looks like it's going to
justify a pass.  I also compared its results to 33285 (a not-too-bad
xen-unstable flight before we branched) and things look reasonably
good there too.

So I think we are going to be good to go.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 13:16,  wrote:
> I've explained this several times. If there's a violation on above 
> assumption 
> from required devices, same for report-all and report-sel. If the violation 
> is 
> caused by unnecessary devices, please note I'm proposing 'warn' here so
> report-all at most just adds more warnings in domain builder. the conflict
> will be caught later if it becomes relevant to be assigned (e.g. thru 
> hotplug).

Since we're apparently not understanding one another, please
explain with a suitable example how you envision things to behave.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c

2015-01-12 Thread David Vrabel
On 12/01/15 11:36, Roger Pau Monné wrote:
> El 12/01/15 a les 8.09, Bob Liu ha escrit:
>>
>> On 01/09/2015 11:51 PM, Roger Pau Monné wrote:
>>> El 06/01/15 a les 14.19, Bob Liu ha escrit:
 When there is no enough free grants, gnttab_alloc_grant_references()
 will fail and block request queue will stop.
 If the system is always lack of grants, blkif_restart_queue_callback() 
 can't be
 scheduled and block request queue can't be restart(block I/O hang).

 But when there are former requests complete, some grants may free to
 persistent_gnts_c, we can give the request queue another chance to restart 
 and
 avoid block hang.

 Reported-by: Junxiao Bi 
 Signed-off-by: Bob Liu 
 ---
  drivers/block/xen-blkfront.c |   11 +++
  1 file changed, 11 insertions(+)

 diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
 index 2236c6f..dd30f99 100644
 --- a/drivers/block/xen-blkfront.c
 +++ b/drivers/block/xen-blkfront.c
 @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, 
 struct blkfront_info *info,
}
}
}
 +
 +  /*
 +   * Request queue would be stopped if failed to alloc enough grants and
 +   * won't be restarted until gnttab_free_count >= info->callback->count.
 +   *
 +   * But there is another case, once we have enough persistent grants we
 +   * can try to restart the request queue instead of continue to wait for
 +   * 'gnttab_free_count'.
 +   */
 +  if (info->persistent_gnts_c >= info->callback.count)
 +  schedule_work(&info->work);
>>>
>>> I guess I'm missing something here, but blkif_completion is called by
>>> blkif_interrupt, which in turn calls kick_pending_request_queues when
>>> finished, which IMHO should be enough to restart the processing of requests.
>>>
>>
>> You are right, sorry for the mistake.
>>
>> The problem we met was a xenblock I/O hang.
>> Dumped data showed at that time info->persistent_gnt_c = 8, max_gref = 8
>> but block request queue was still stopped.
>> It's very hard to reproduce this issue, we only see it once.
>>
>> I think there might be a race condition:
>>
>> request A  request B:
>>
>>info->persistent_gnts_c < max_grefs
>>and fail to alloc enough grants
>>
>>
>> 
>> interrupt happen, blkif_complte():
>> info->persistent_gnts_c++
>> kick_pending_request_queues()
>>
>> stop block request queue
>> added to callback list
>>
>> If the system don't have enough grants(but have enough persistent_gnts),
>> request queue would still hang.
> 
> Not sure how can this happen, blkif_queue_request explicitly checks that
> persistent_gnts_c < max_grefs before adding the callback and stopping
> the request queue, so in your case the queue should not be blocked. Can
> you dump the state of info->connected?

I think Bob has correctly identified a race.

After calling blk_stop_queue(), check info->persistent_gnts again and
restart the queue if there free grefs.

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 01/11] VMX: VMFUNC and #VE definitions and detection.

2015-01-12 Thread Andrew Cooper
On 09/01/15 21:26, Ed White wrote:
> Currently, neither is enabled globally but may be enabled on a per-VCPU
> basis by the altp2m code.
>
> Everything can be force-disabled globally by specifying vmfunc=0 on the
> Xen command line.
>
> Remove the check for EPTE bit 63 == zero in ept_split_super_page(), as
> that bit is now hardware-defined.
>
> Signed-off-by: Ed White 
> ---
>  docs/misc/xen-command-line.markdown |  7 +++
>  xen/arch/x86/hvm/vmx/vmcs.c | 40 
> +
>  xen/arch/x86/mm/p2m-ept.c   |  1 -
>  xen/include/asm-x86/hvm/vmx/vmcs.h  | 16 +++
>  xen/include/asm-x86/hvm/vmx/vmx.h   | 13 +++-
>  xen/include/asm-x86/msr-index.h |  1 +
>  6 files changed, 76 insertions(+), 2 deletions(-)
>
> diff --git a/docs/misc/xen-command-line.markdown 
> b/docs/misc/xen-command-line.markdown
> index 152ae03..00fbae7 100644
> --- a/docs/misc/xen-command-line.markdown
> +++ b/docs/misc/xen-command-line.markdown
> @@ -1305,6 +1305,13 @@ The optional `keep` parameter causes Xen to continue 
> using the vga
>  console even after dom0 has been started.  The default behaviour is to
>  relinquish control to dom0.
>  
> +### vmfunc (Intel)
> +> `= `
> +
> +> Default: `true`
> +
> +Use VMFUNC and #VE support if available.
> +
>  ### vpid (Intel)
>  > `= `
>  
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index 9d8033e..4274e92 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -50,6 +50,9 @@ boolean_param("unrestricted_guest", 
> opt_unrestricted_guest_enabled);
>  static bool_t __read_mostly opt_apicv_enabled = 1;
>  boolean_param("apicv", opt_apicv_enabled);
>  
> +static bool_t __read_mostly opt_vmfunc_enabled = 1;
> +boolean_param("vmfunc", opt_vmfunc_enabled);

Please can experimental features be off by default.  (I am specifically
looking to avoid the issues we had with apicv getting into stable
releases despite reliably causing problems for migration).

I suspect you will have many interested testers for this featureset, and
it is fine to patch the default later when the feature gets declared stable.

I also wonder whether it might be better to have a "vmx=" command line
parameter with "vmfunc" as a subopt, to save gaining an ever increasing
set of related top level parameters?

Other than this, the content of the rest of the patch appears fine.

~Andrew


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2] [Bugfix] x86/apic: Fix xen IRQ allocation failure caused by commit b81975eade8c

2015-01-12 Thread Jiang Liu
Commit b81975eade8c ("x86, irq: Clean up irqdomain transition code")
breaks xen IRQ allocation because xen_smp_prepare_cpus() doesn't invoke
setup_IO_APIC(), so no irqdomains created for IOAPICs and
mp_map_pin_to_irq() fails at the very beginning.

Enhance xen_smp_prepare_cpus() to call setup_IO_APIC() to initialize
irqdomain for IOAPICs.

Signed-off-by: Jiang Liu 
---
Hi Sander,
Could you please help to test this patch against 3.19-rc3?
I have reworked it based Konrad's suggestions.
Thanks!
Gerry
---
 arch/x86/kernel/apic/io_apic.c |   31 +++
 arch/x86/xen/smp.c |1 +
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3f5f60406ab1..81d4faeb8040 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -38,6 +38,8 @@
 #include 
 #include 
 
+#include 
+
 #include 
 #include 
 #include 
@@ -2165,6 +2167,9 @@ static inline void __init check_timer(void)
unsigned long flags;
int no_pin1 = 0;
 
+   if (!nr_legacy_irqs())
+   return;
+
local_irq_save(flags);
 
/*
@@ -2185,6 +2190,13 @@ static inline void __init check_timer(void)
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
legacy_pic->init(1);
 
+   /*
+* legacy_pic will be changed to null_legacy_pic if init() fails to
+* to detect legacy PIC hardware. Recheck again.
+*/
+   if (!nr_legacy_irqs())
+   goto out;
+
pin1  = find_isa_irq_pin(0, mp_INT);
apic1 = find_isa_irq_apic(0, mp_INT);
pin2  = ioapic_i8259.pin;
@@ -2369,6 +2381,15 @@ static void ioapic_destroy_irqdomain(int idx)
ioapics[idx].pin_info = NULL;
 }
 
+static void setup_IO_APIC_IDs(void)
+{
+   if (xen_domain())
+   return;
+
+   x86_init.mpparse.setup_ioapic_ids();
+   sync_Arb_IDs();
+}
+
 void __init setup_IO_APIC(void)
 {
int ioapic;
@@ -2382,16 +2403,10 @@ void __init setup_IO_APIC(void)
for_each_ioapic(ioapic)
BUG_ON(mp_irqdomain_create(ioapic));
 
-   /*
- * Set up IO-APIC IRQ routing.
- */
-   x86_init.mpparse.setup_ioapic_ids();
-
-   sync_Arb_IDs();
+   setup_IO_APIC_IDs();
setup_IO_APIC_irqs();
init_IO_APIC_traps();
-   if (nr_legacy_irqs())
-   check_timer();
+   check_timer();
 
ioapic_initialized = 1;
 }
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 4c071aeb8417..9f404df64422 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -327,6 +327,7 @@ static void __init xen_smp_prepare_cpus(unsigned int 
max_cpus)
xen_raw_printk(m);
panic(m);
}
+   setup_IO_APIC();
xen_init_lock_cpu(0);
 
smp_store_boot_cpu_info();
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed

2015-01-12 Thread Li, Liang Z
> > > Use the 'xl pci-attach $DomU $BDF' command to attach more than one
> > > PCI devices to the guest, then detach the devices with 'xl
> > > pci-detach $DomU $BDF', after that, re-attach these PCI devices
> > > again, an error message will be reported like following:
> > >
> > > libxl: error: libxl_qmp.c:287:qmp_handle_error_response: receive an
> > > error message from QMP server: Duplicate ID 'pci-pt-03_10.1'
> > > for device.
> > >
> > > The count of calling xen_pt_region_add and xen_pt_region_del are not
> > > the same will cause the XenPCIPassthroughState and it's related
> > > QemuOpts object not be released properly.
> >
> > Thanks for the patch!
> >
> > From this description, I don't quite understand why the
> > memory_region_ref and memory_region_unref calls are wrong.  What do
> > you mean by "The count of calling xen_pt_region_add and
> > xen_pt_region_del are not the same"?

I means for some memory regions , only the xen_pt_region_add callback function
was called while the xen_pt_region_del was not called.

> > On unplug xen_pt_region_del does not get called?
> > Or the memory region argument is not exactly the same as the one
> > initially passed to xen_pt_region_add?
> >
> 
> agree. Liang, could you elaborate how the patch is associated with above
> explanation? :-)


I have verified the following patch can work too:

diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..f2893b2 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d)
 }
 
 out:
-memory_listener_register(&s->memory_listener, &address_space_memory);
+memory_listener_register(&s->memory_listener, 
+ &s->dev.bus_master_as);
 memory_listener_register(&s->io_listener, &address_space_io);
 XEN_PT_LOG(d,
"Real physical device %02x:%02x.%d registered successfully!\n",

By further debugging, I found when using 'address_space_memory',  
'xen_pt_region_del' 
won't be called when the memory region's name is not  ' xen-pci-pt-*', when 
using
 ' s->dev.bus_master_as ', there is no such issue.

I think use the device related address space here is more reasonable, but I am 
not sure.
 Could you give some suggestion?

Liang

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Ian Campbell
On Fri, 2015-01-09 at 06:57 +, Tian, Kevin wrote:
> 3) report-sel vs. report-all

One thing I'm not clear on is whether you are suggesting to reserve RMRR
(either -all or -sel) for every domain by default, or whether the guest
CFG will need to explicitly opt-in, IOW is there a 3rd report-none
option which is the default unless otherwise requested (e.g. by
e820_host=1, or some other new option)?

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] arm64/EFI: minor corrections

2015-01-12 Thread Ian Campbell
On Mon, 2015-01-12 at 08:59 +, Jan Beulich wrote:
> - don't bail when using the last slot of bootinfo.mem.bank[] (due to
>   premature incrementing of the array index)
> - GUIDs should be static const (and placed into .init.* whenever
>   possible)
> - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly
>   append one to the message passed to the function
  - Avoid needless use of DisplayUint via __stringify.

> Signed-off-by: Jan Beulich 

Acked-by: Ian Campbell 

Will you commit yourself or would you like me to?

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Tian, Kevin
> From: Ian Campbell [mailto:ian.campb...@citrix.com]
> Sent: Monday, January 12, 2015 9:42 PM
> 
> On Fri, 2015-01-09 at 06:57 +, Tian, Kevin wrote:
> > 3) report-sel vs. report-all
> 
> One thing I'm not clear on is whether you are suggesting to reserve RMRR
> (either -all or -sel) for every domain by default, or whether the guest
> CFG will need to explicitly opt-in, IOW is there a 3rd report-none
> option which is the default unless otherwise requested (e.g. by
> e820_host=1, or some other new option)?

only when a device is assigned (or potentially prepare for hotplug usage),
and report-all/sel is from hypervisor p.o.v to tell userspace about all RMRR
regions on this platform or just RMRR regions belonging to specified devices.
'e820_host' only makes holes to prepare (more than RMRR requires). finally 
we still need query actual reserved regions reported by hypervisor and 
then mark them reserved in guest e820 table and avoid conflict for PCI BAR 
allocation etc. 

Thanks
Kevin
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [RFC V9 2/4] domain snapshot overview

2015-01-12 Thread Ian Campbell
On Mon, 2015-01-12 at 00:01 -0700, Chun Yan Liu wrote:
> 
> >>> On 1/8/2015 at 08:26 PM, in message 
> >>> <1420719995.19787.62.ca...@citrix.com>, Ian
> Campbell  wrote: 
> > On Mon, 2014-12-22 at 20:42 -0700, Chun Yan Liu wrote: 
> > >  
> > > >>> On 12/19/2014 at 06:25 PM, in message  
> > <1418984720.20028.15.ca...@citrix.com>, 
> > > Ian Campbell  wrote:  
> > > > On Thu, 2014-12-18 at 22:45 -0700, Chun Yan Liu wrote:  
> > > > >   
> > > > > >>> On 12/18/2014 at 11:10 PM, in message   
> > > > <1418915443.11882.86.ca...@citrix.com>,  
> > > > > Ian Campbell  wrote:   
> > > > > > On Tue, 2014-12-16 at 14:32 +0800, Chunyan Liu wrote:   
> > > > > > > Changes to V8:   
> > > > > > >   * add an overview document, so that one can has a overall look  
> > > > > > >  
> > > > > > > about the whole domain snapshot work, limits, requirements,   
> > > > > > > how to do, etc.   
> > > > > > >
> > > > > > > =
> > > > > > >
> > > > > > > Domain snapshot overview   
> > > > > >
> > > > > > I don't see a similar section for disk snapshots, are you not   
> > > > > > considering those here except as a part of a domain snapshot or is 
> > > > > > this   
> >  
> > > > > > an oversight?   
> > > > > >
> > > > > > There are three main use cases (that I know of at least) for   
> > > > > > snapshotting like behaviour.   
> > > > > >
> > > > > > One is as you've mentioned below for "backup", i.e. to preserve the 
> > > > > > VM   
> > > > > > at a certain point in time in order to be able to roll back to it. 
> > > > > > Is   
> > > > > > this the only usecase you are considering?   
> > > > >   
> > > > > Yes. I didn't take disk snapshot thing into the scope.  
> > > > >   
> > > > > >
> > > > > > A second use case is to support "gold image" type deployments, i.e. 
> > > > > >   
> > > > > > where you create one baseline single disk image and then clone it   
> > > > > > multiple times to deploy lots of guests. I think this is usually a 
> > > > > > "disk  
> >   
> > > > > > snapshot" type thing, but maybe it can be implemented as restoring 
> > > > > > a   
> > > > > > gold domain snapshot multiple times (e.g. for start of day 
> > > > > > performance   
> > > > > > reasons).   
> > > > >   
> > > > > As we initially discussed about the thing, disk snapshot thing can be 
> > > > >  
> > done  
> > > > > be existing tools directly like qemu-img, vhd-util.  
> > > >   
> > > > I was reading this section as a more generic overview of snapshotting,  
> > > > without reference to where/how things might ultimately be implemented.  
> > > >   
> > > > From a design point of view it would be useful to cover the various use 
> > > >  
> > > > cases, even if the solution is that the user implements them using CLI  
> > > > tools by hand (xl) or the toolstack does it for them internally  
> > > > (libvirt).  
> > > >   
> > > > This way we can more clearly see the full picture, which allows us to  
> > > > validate that we are making the right choices about what goes where.  
> > >  
> > > OK. I see. I think this user case is more like how to use the snapshot,  
> > rather 
> > > than how to implement snapshot. Right? 
> >  
> > Correct, what the user is actually trying to achieve with the 
> > functionality. 
> >  
> > > 'Gold image' or 'Gold domain', the needed work is more like cloning 
> > > disks. 
> >  
> > Yes, or resuming multiple times. 
> 
> I see. But IMO it doesn't need change in snapshot design and implementation.
> Even resuming multiple times, they couldn't use the same image but duplicate
> the image multiple times.

Perhaps, but the use case should be included so that this rationale for
not worrying about it can be written down (so that people like me don't
keep asking...) 

> 
> >  
> > > > > > The third case, (which is similar to the first), is taking a disk   
> > > > > > snapshot in order to be able to run you usual backup software on 
> > > > > > the   
> > > > > > snapshot (which is now unchanging, which is handy) and then 
> > > > > > deleting the  
> >   
> > > > > > disk snapshot (this differs from the first case in which disk is 
> > > > > > active   
> >  
> > > > > > after the snapshot, and due to the lack of the memory part).   
> > > > >   
> > > > > Sorry, I'm still not quite clear about what this user case wants to 
> > > > > do.  
> > > >   
> > > > The user has an active domain which they want to backup, but backup  
> > > > software often does not cope well if the data is changing under its  
> > > > feet.  
> > > >   
> > > > So the users wants to take a snapshot of the domains disks while 
> > > > leaving  
> > > > the domain running, so they can backup that static version of the disk  
> > > > out of band from the VM itself (e.g. by attaching it to a separate  
> > > > backup VM).  
> > >  
> > > Got it. So that's simply disk-only snapshot when domian is active. As you 
> > > mentioned below

Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Pasi Kärkkäinen
On Mon, Jan 12, 2015 at 11:25:56AM +, George Dunlap wrote:
> On Fri, Jan 9, 2015 at 2:43 AM, Tian, Kevin  wrote:
> >> From: George Dunlap
> >> Sent: Thursday, January 08, 2015 8:55 PM
> >>
> >> On Thu, Jan 8, 2015 at 12:49 PM, George Dunlap
> >>  wrote:
> >> > If RMRRs almost always happen up above 2G, for example, then a simple
> >> > solution that wouldn't require too much work would be to make sure
> >> > that the PCI MMIO hole we specify to libxc and to qemu-upstream is big
> >> > enough to include all RMRRs.  That would satisfy the libxc and qemu
> >> > requirements.
> >> >
> >> > If we then store specific RMRRs we want included in xenstore,
> >> > hvmloader can put them in the e820 map, and that would satisfy the
> >> > hvmloader requirement.
> >>
> >> An alternate thing to do here would be to "properly" fix the
> >> qemu-upstream problem, by making a way for hvmloader to communicate
> >> changes in the gpfn layout to qemu.
> >>
> >> Then hvmloader could do the work of moving memory under RMRRs to
> >> higher memory; and libxc wouldn't need to be involved at all.
> >>
> >> I think it would also fix our long-standing issues with assigning PCI
> >> devices to qemu-upstream guests, which up until now have only been
> >> worked around.
> >>
> >
> > could you elaborate a bit for that long-standing issue?
> 
> So qemu-traditional didn't particularly expect to know the guest
> memory layout.  qemu-upstream does; it expects to know what areas of
> memory are guest memory and what areas of memory are unmapped.  If a
> read or write happens to a gpfn which *xen* knows is valid, but which
> *qemu-upstream* thinks is unmapped, then qemu-upstream will crash.
> 
> The problem though is that the guest's memory map is not actually
> communicated to qemu-upstream in any way.  Originally, qemu-upstream
> was only told how much memory the guest had, and it just "happens" to
> choose the same guest memory layout as the libxc domain builder does.
> This works, but it is bad design, because if libxc were to change for
> some reason, people would have to simply remember to also change the
> qemu-upstream layout.
> 
> Where this really bites us is in PCI pass-through.  The default <4G
> MMIO hole is very small; and hvmloader naturally expects to be able to
> make this area larger by relocating memory from below 4G to above 4G.
> It moves the memory in Xen's p2m, but it has no way of communicating
> this to qemu-upstream.  So when the guest does an MMIO instuction that
> causes qemu-upstream to access that memory, the guest crashes.
> 
> There are two work-arounds at the moment:
> 1. A flag which tells hvmloader not to relocate memory
> 2. The option to tell qemu-upstream to make the memory hole larger.
> 
> Both are just work-arounds though; a "proper fix" would be to allow
> hvmloader some way of telling qemu that the memory has moved, so it
> can update its memory map.
> 
> This will (I'm pretty sure) have an effect on RMRR regions as well,
> for the reasons I've mentioned above: whether make the "holes" for the
> RMRRs in libxc or in hvmloader, if we *move* that memory up to the top
> of the address space (rather than, say, just not giving that RAM to
> the guest), then qemu-upstream's idea of the guest memory map will be
> wrong, and will probably crash at some point.
> 
> Having the ability for hvmloader to populate and/or move the memory
> around, and then tell qemu-upstream what the resulting map looked
> like, would fix both the MMIO-resize issue and the RMRR problem, wrt
> qemu-upstream.
> 

Hmm, wasn't this changed slightly during Xen 4.5 development by Don Slutz?

You can now specify the mmio_hole size for HVM guests when using qemu-upstream:
http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List


"Bigger PCI MMIO hole in QEMU via the mmio_hole parameter in guest config, 
which allows configuring the MMIO size below 4GB. "

"Backport pc & q35: Add new machine opt max-ram-below-4g":
http://xenbits.xen.org/gitweb/?p=qemu-upstream-unstable.git;a=commit;h=ffdacad07002e14a8072ae28086a57452e48d458

"x86: hvm: Allow configuration of the size of the mmio_hole.":
http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=2d927fc41b8e130b3b8910e4442d469d2ac7


-- Pasi


>  -George
> 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread Ian Campbell
On Mon, 2015-01-12 at 13:53 +, Tian, Kevin wrote:
> > From: Ian Campbell [mailto:ian.campb...@citrix.com]
> > Sent: Monday, January 12, 2015 9:42 PM
> > 
> > On Fri, 2015-01-09 at 06:57 +, Tian, Kevin wrote:
> > > 3) report-sel vs. report-all
> > 
> > One thing I'm not clear on is whether you are suggesting to reserve RMRR
> > (either -all or -sel) for every domain by default, or whether the guest
> > CFG will need to explicitly opt-in, IOW is there a 3rd report-none
> > option which is the default unless otherwise requested (e.g. by
> > e820_host=1, or some other new option)?
> 
> only when a device is assigned (or potentially prepare for hotplug usage),

How is this triggered though? Via pci= non-empty in the guest CFG?

This sounds like it should behave similarly (in terms of when it is
enabled, not necessarily in functionality) to the e820_host boolean,
which is set by default if the pci= list is not empty, or can be
overridden if the host admin anticipates doing hotplug.

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] arm64/EFI: minor corrections

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 14:46,  wrote:
> On Mon, 2015-01-12 at 08:59 +, Jan Beulich wrote:
>> - don't bail when using the last slot of bootinfo.mem.bank[] (due to
>>   premature incrementing of the array index)
>> - GUIDs should be static const (and placed into .init.* whenever
>>   possible)
>> - PrintErrMsg() issues a CR/LF pair itself - no need to explicitly
>>   append one to the message passed to the function
>   - Avoid needless use of DisplayUint via __stringify.
> 
>> Signed-off-by: Jan Beulich 
> 
> Acked-by: Ian Campbell 
> 
> Will you commit yourself or would you like me to?

I will.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [qemu-mainline bisection] complete test-amd64-amd64-xl-qemuu-win7-amd64

2015-01-12 Thread xen . org
branch xen-unstable
xen branch xen-unstable
job test-amd64-amd64-xl-qemuu-win7-amd64
test windows-install

Tree: linux git://xenbits.xen.org/linux-pvops.git
Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git
Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git
Tree: qemuu git://git.qemu.org/qemu.git
Tree: xen git://xenbits.xen.org/xen.git

*** Found and reproduced problem changeset ***

  Bug is in tree:  qemuu git://git.qemu.org/qemu.git
  Bug introduced:  49d2e648e8087d154d8bf8b91f27c8e05e79d5a6
  Bug not present: 60fb1a87b47b14e4ea67043aa56f353e77fbd70a


  commit 49d2e648e8087d154d8bf8b91f27c8e05e79d5a6
  Author: Marcel Apfelbaum 
  Date:   Tue Dec 16 16:58:05 2014 +
  
  machine: remove qemu_machine_opts global list
  
  QEMU has support for options per machine, keeping
  a global list of options is no longer necessary.
  
  Signed-off-by: Marcel Apfelbaum 
  Reviewed-by: Alexander Graf 
  Reviewed-by: Greg Bellows 
  Message-id: 1418217570-15517-2-git-send-email-marce...@redhat.com
  Signed-off-by: Peter Maydell 


For bisection revision-tuple graph see:
   
http://www.chiark.greenend.org.uk/~xensrcts/results/bisect.qemu-mainline.test-amd64-amd64-xl-qemuu-win7-amd64.windows-install.html
Revision IDs in each graph node refer, respectively, to the Trees above.


Searching for failure / basis pass:
 33123 fail [host=itch-mite] / 32598 ok.
Failure / basis pass flights: 33123 / 32598
(tree with no url: seabios)
Tree: linux git://xenbits.xen.org/linux-pvops.git
Tree: linuxfirmware git://xenbits.xen.org/osstest/linux-firmware.git
Tree: qemu git://xenbits.xen.org/staging/qemu-xen-unstable.git
Tree: qemuu git://git.qemu.org/qemu.git
Tree: xen git://xenbits.xen.org/xen.git
Latest 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
Basis pass 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
7e58e2ac7778cca3234c33387e49577bb7732714 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
Generating revisions with ./adhoc-revtuple-generator  
git://xenbits.xen.org/linux-pvops.git#83a926f7a4e39fb6be0576024e67fe161593defa-83a926f7a4e39fb6be0576024e67fe161593defa
 
git://xenbits.xen.org/osstest/linux-firmware.git#c530a75c1e6a472b0eb9558310b518f0dfcd8860-c530a75c1e6a472b0eb9558310b518f0dfcd8860
 
git://xenbits.xen.org/staging/qemu-xen-unstable.git#b0d42741f8e9a00854c3b3faca1da84bfc69bf22-b0d42741f8e9a00854c3b3faca1da84bfc69bf22
 
git://git.qemu.org/qemu.git#7e58e2ac7778cca3234c33387e49577bb7732714-ab0302ee764fd702465aef6d88612cdff4302809
 
git://xenbits.xen.org/xen.git#36174af3fbeb1b662c0eadbfa193e77f68cc955b-36174af3fbeb1b662c0eadbfa193e77f68cc955b
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/qemu
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://git.qemu.org/qemu.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
+ exec
+ sh -xe
+ cd /export/home/osstest/repos/qemu
+ git remote set-url origin 
git://drall.uk.xensource.com:9419/git://git.qemu.org/qemu.git
+ git fetch -p origin +refs/heads/*:refs/remotes/origin/*
Loaded 1005 nodes in revision graph
Searching for test results:
 32585 pass irrelevant
 32598 pass 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
7e58e2ac7778cca3234c33387e49577bb7732714 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32611 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32626 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32689 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32659 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32876 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32854 fail 83a926f7a4e39fb6be0576024e67fe161593defa 
c530a75c1e6a472b0eb9558310b518f0dfcd8860 
b0d42741f8e9a00854c3b3faca1da84bfc69bf22 
ab0302ee764fd702465aef6d88612cdff4302809 
36174af3fbeb1b662c0eadbfa193e77f68cc955b
 32908 fail 83a926f7a4e39fb6be0576024e67fe161

Re: [Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects

2015-01-12 Thread Ian Campbell
On Mon, 2015-01-12 at 09:00 +, Jan Beulich wrote:
> In particular the .rodata.str2 case is relevant for the EFI boot code
> (moving around 3k from permanent to init-time sections).

So we go from handling .rodata.str1.{1,2,3,8}
to .rodata.str{1,2,4}.{1,2,4,8,16}?

> Signed-off-by: Jan Beulich 

Acked-by: Ian Campbell 



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] extend list of sections convertible to .init.* ones in init-only objects

2015-01-12 Thread Jan Beulich
>>> On 12.01.15 at 15:03,  wrote:
> On Mon, 2015-01-12 at 09:00 +, Jan Beulich wrote:
>> In particular the .rodata.str2 case is relevant for the EFI boot code
>> (moving around 3k from permanent to init-time sections).
> 
> So we go from handling .rodata.str1.{1,2,3,8}
> to .rodata.str{1,2,4}.{1,2,4,8,16}?

These plus .rodata.cst{1,2,4,8,16}.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread George Dunlap
On Mon, Jan 12, 2015 at 12:28 PM, Tian, Kevin  wrote:
>> From: George Dunlap
>> Sent: Monday, January 12, 2015 8:14 PM
>>
>> On Mon, Jan 12, 2015 at 11:22 AM, Tian, Kevin  wrote:
>> >> From: Jan Beulich [mailto:jbeul...@suse.com]
>> >> Sent: Monday, January 12, 2015 6:23 PM
>> >>
>> >> >>> On 12.01.15 at 11:12,  wrote:
>> >> >>  From: Jan Beulich [mailto:jbeul...@suse.com]
>> >> >> Sent: Monday, January 12, 2015 6:09 PM
>> >> >>
>> >> >> >>> On 12.01.15 at 10:56,  wrote:
>> >> >> > the result is related to another open whether we want to block guest
>> >> >> > boot for such problem. If 'warn' in domain builder is acceptable, we
>> >> >> > don't need to change lowmem for such rare 1GB case, just throws
>> >> >> > a warning for unnecessary conflictions (doesn't hurt if user doesn't
>> >> >> > assign it).
>> >> >>
>> >> >> And how would you then deal with the one guest needing that
>> >> >> range reserved?
>> >> >
>> >> > if guest needs the range, then report-all or report-sel doesn't matter.
>> >> > domain builder throws the warning, and later device assignment will
>> >> > fail (or warn w/ override). In reality I think 1GB is rare. Making such
>> >> > assumption to simplify implementation is reasonable.
>> >>
>> >> One of my main problems with all you recent argumentation here
>> >> is the arbitrary use of the 1Gb boundary - there's nothing special
>> >> in this discussion with where the boundary is. Everything revolves
>> >> around the (undue) effect of report-all on domains not needing all
>> >> of the ranges found on the host.
>> >>
>> >
>> > I'm not sure which part of my argument is not clear here. report-all
>> > would be a problem here only if we want to fix all the conflictions
>> > (then pulling unnecessary devices increases the confliction possibility)
>> > in the domain builder. but if we only fix reasonable ones (e.g. >3GB)
>> > while warn other conflictions (e.g. <3G) in domain builder (let later
>> > assignment path to actually fail if confliction does matter), then we
>> > don't need to solve all conflictions in domain builder (if say 1G example
>> > fixing it may instead reduce lowmem greatly) and then report-all
>> > may just add more warnings than report-sel for unused devices.
>>
>> You keep saying "report-all" or "report-sel", but I'm not 100% clear
>> what you mean by those.  In any case, the naming has got to be a bit
>> misleading: the important questions at the moment, AFAICT, are:
>
> I explained them in original proposal

Yes, I read it and didn't understand it there either. :-)

>> 1. Whether we make holes at boot time for all RMRRs on the system, or
>> whether only make RMRRs for some subset (or potentially some other
>> arbitrary range, which may include RMRRs on other hosts to which we
>> may want to migrate).
>
> I use 'report-all' to stand for making holes for all RMRRs on the system,
> while 'report-sel' for specified subset.
>
> including other RMRRs (from admin for migration) is orthogonal to
> above open.

Right; so the "report" in this case is "report to the guest".

As I said, I think that's confusing terminology; after all, we want to
report to the guest all holes that we make, and only the holes that we
make.  The question isn't then which ones we report, but which ones we
make holes for. :-)

So for this discussion, maybe "rmrr-host" (meaning, copy RMRRs from
the host) or "rmrr-sel" (meaning, specify a selection of RMRRs, which
may be from this host, or even another host)?

Given that the ranges may be of arbitrary size, and that we may want
to specify additional ranges for migration to other hosts, then I
think we need at some level we need the machinery to be in place to
specify the RMRRs that will be reserved for a specific guest.

At the xl level, there should of course be a way to specify "use all
host RMRRs"; but what should happen then is that xl / libxl should
query Xen for the host RMRRs and then pass those down to the next
layer of the library.

>> 2. Whether those holes are made by the domain builder in libxc, or by
>> hvmloader
>
> based on current discussion, whether to make holes in hvmloader
> doesn't bring fundamental difference. as long as domain builder
> still need to populate memory (even minimal for hvmloader to boot),
> it needs to check conflict and may ideally make hole too (though we
> may make assumption not doing that)

Well it will have an impact on the overall design of the code; but
you're right, if RMRRs really can (and will) be anywhere in memory,
then the domain builder will need to know what RMRRs are going to be
reserved for this VM and avoid populating those.  If, on the other
hand, we can make some fairly small assumptions about where there will
not be any RMRRs, then we can get away with handling everything in
hvmloader.

>>
>> 3. What happens if Xen is asked to assign a device and it finds that
>> the required RMRR is not empty:
>>  a. during guest creation
>>  b. after the guest has booted
>
> for Xen we don't need differentiate

Re: [Xen-devel] (v2) Design proposal for RMRR fix

2015-01-12 Thread George Dunlap
On Mon, Jan 12, 2015 at 1:56 PM, Pasi Kärkkäinen  wrote:
> On Mon, Jan 12, 2015 at 11:25:56AM +, George Dunlap wrote:
>> So qemu-traditional didn't particularly expect to know the guest
>> memory layout.  qemu-upstream does; it expects to know what areas of
>> memory are guest memory and what areas of memory are unmapped.  If a
>> read or write happens to a gpfn which *xen* knows is valid, but which
>> *qemu-upstream* thinks is unmapped, then qemu-upstream will crash.
>>
>> The problem though is that the guest's memory map is not actually
>> communicated to qemu-upstream in any way.  Originally, qemu-upstream
>> was only told how much memory the guest had, and it just "happens" to
>> choose the same guest memory layout as the libxc domain builder does.
>> This works, but it is bad design, because if libxc were to change for
>> some reason, people would have to simply remember to also change the
>> qemu-upstream layout.
>>
>> Where this really bites us is in PCI pass-through.  The default <4G
>> MMIO hole is very small; and hvmloader naturally expects to be able to
>> make this area larger by relocating memory from below 4G to above 4G.
>> It moves the memory in Xen's p2m, but it has no way of communicating
>> this to qemu-upstream.  So when the guest does an MMIO instuction that
>> causes qemu-upstream to access that memory, the guest crashes.
>>
>> There are two work-arounds at the moment:
>> 1. A flag which tells hvmloader not to relocate memory
>> 2. The option to tell qemu-upstream to make the memory hole larger.
>>
>> Both are just work-arounds though; a "proper fix" would be to allow
>> hvmloader some way of telling qemu that the memory has moved, so it
>> can update its memory map.
>>
>> This will (I'm pretty sure) have an effect on RMRR regions as well,
>> for the reasons I've mentioned above: whether make the "holes" for the
>> RMRRs in libxc or in hvmloader, if we *move* that memory up to the top
>> of the address space (rather than, say, just not giving that RAM to
>> the guest), then qemu-upstream's idea of the guest memory map will be
>> wrong, and will probably crash at some point.
>>
>> Having the ability for hvmloader to populate and/or move the memory
>> around, and then tell qemu-upstream what the resulting map looked
>> like, would fix both the MMIO-resize issue and the RMRR problem, wrt
>> qemu-upstream.
>>
>
> Hmm, wasn't this changed slightly during Xen 4.5 development by Don Slutz?
>
> You can now specify the mmio_hole size for HVM guests when using 
> qemu-upstream:
> http://wiki.xenproject.org/wiki/Xen_Project_4.5_Feature_List
>
>
> "Bigger PCI MMIO hole in QEMU via the mmio_hole parameter in guest config, 
> which allows configuring the MMIO size below 4GB. "
>
> "Backport pc & q35: Add new machine opt max-ram-below-4g":
> http://xenbits.xen.org/gitweb/?p=qemu-upstream-unstable.git;a=commit;h=ffdacad07002e14a8072ae28086a57452e48d458
>
> "x86: hvm: Allow configuration of the size of the mmio_hole.":
> http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=2d927fc41b8e130b3b8910e4442d469d2ac7

Yes -- that's workaround #2 above ("tell qemu-upstream to make the
memory hole larger").  But it's still a work-around, because it
requires the admin to figure out how big a memory hole he needs.  With
qemu-traditional, he could just assign whatever devices he wanted, and
hvmloader would make it the right size automatically.  Ideally that's
how it would work for qemu-upstream as well.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen-pt: Fix PCI devices re-attach failed

2015-01-12 Thread Paolo Bonzini


On 12/01/2015 14:35, Li, Liang Z wrote:
> 
> diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357..f2893b2 100644
> --- a/hw/xen/xen_pt.c
> +++ b/hw/xen/xen_pt.c
> @@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d)
>  }
>  
>  out:
> -memory_listener_register(&s->memory_listener, &address_space_memory);
> +memory_listener_register(&s->memory_listener, 
> + &s->dev.bus_master_as);
>  memory_listener_register(&s->io_listener, &address_space_io);
>  XEN_PT_LOG(d,
> "Real physical device %02x:%02x.%d registered 
> successfully!\n",
> 
> By further debugging, I found when using 'address_space_memory',  
> 'xen_pt_region_del' 
> won't be called when the memory region's name is not  ' xen-pci-pt-*', when 
> using
>  ' s->dev.bus_master_as ', there is no such issue.
> 
> I think use the device related address space here is more reasonable, but I 
> am not sure.
>  Could you give some suggestion?

Yes, this patch makes sense.  The listener will be called every time the
command register is written.

Paolo

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [libvirt] [PATCH 09/12] Revert "src/xenconfig: Xen-xl parser"

2015-01-12 Thread John Ferlan


On 01/10/2015 12:03 AM, Jim Fehlig wrote:
> This reverts commit 2c78051a14acfb7aba078d569b1632dfe0ca0853.
> 
> Conflicts:
>   src/Makefile.am
> 
> Signed-off-by: Jim Fehlig 
> ---
>  .gitignore|   1 -
>  cfg.mk|   3 +-
>  configure.ac  |   1 -
>  po/POTFILES.in|   1 -
>  src/Makefile.am   |  25 +--
>  src/libvirt_xenconfig.syms|   4 -
>  src/xenconfig/xen_common.c|   3 +-
>  src/xenconfig/xen_xl.c| 499 
> --
>  src/xenconfig/xen_xl.h|  33 ---
>  src/xenconfig/xen_xl_disk.l   | 256 --
>  src/xenconfig/xen_xl_disk_i.h |  39 
>  11 files changed, 4 insertions(+), 861 deletions(-)
> 

OK - so reverting is fine; however, xen_xl_disk.{c,h} still exist...
Simple enough solution, but they will show up in someone's git status
output since they are also removed from .gitignore.

There are a couple Coverity issues from the next 3 patches - I'll note
them for those directly.

ACK

John


> diff --git a/.gitignore b/.gitignore
> index eac2203..9d09709 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -140,7 +140,6 @@
>  /src/remote/*_protocol.[ch]
>  /src/rpc/virkeepaliveprotocol.[ch]
>  /src/rpc/virnetprotocol.[ch]
> -/src/xenconfig/xen_xl_disk.[ch]
>  /src/test_libvirt*.aug
>  /src/test_virtlockd.aug
>  /src/util/virkeymaps.h
> diff --git a/cfg.mk b/cfg.mk
> index 3df3dcb..21f83c3 100644
> --- a/cfg.mk
> +++ b/cfg.mk
> @@ -89,9 +89,8 @@ distdir: sc_vulnerable_makefile_CVE-2012-3386.z
>  endif
>  
>  # Files that should never cause syntax check failures.
> -#  (^(HACKING|docs/(news\.html\.in|.*\.patch))|\.(po|fig|gif|ico|png))$$
>  VC_LIST_ALWAYS_EXCLUDE_REGEX = \
> -  
> (^(HACKING|docs/(news\.html\.in|.*\.patch)|src/xenconfig/xen_xl_disk.[chl])|\.(po|fig|gif|ico|png))$$
> +  (^(HACKING|docs/(news\.html\.in|.*\.patch))|\.(po|fig|gif|ico|png))$$
>  
>  # Functions like free() that are no-ops on NULL arguments.
>  useless_free_options =   \
> diff --git a/configure.ac b/configure.ac
> index 167b875..9d12079 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -146,7 +146,6 @@ m4_ifndef([LT_INIT], [
>  ])
>  AM_PROG_CC_C_O
>  AM_PROG_LD
> -AM_PROG_LEX
>  
>  AC_MSG_CHECKING([for how to mark DSO non-deletable at runtime])
>  LIBVIRT_NODELETE=
> diff --git a/po/POTFILES.in b/po/POTFILES.in
> index 094c8e3..e7cb2cc 100644
> --- a/po/POTFILES.in
> +++ b/po/POTFILES.in
> @@ -247,7 +247,6 @@ src/xenapi/xenapi_driver.c
>  src/xenapi/xenapi_utils.c
>  src/xenconfig/xen_common.c
>  src/xenconfig/xen_sxpr.c
> -src/xenconfig/xen_xl.c
>  src/xenconfig/xen_xm.c
>  tests/virpolkittest.c
>  tools/libvirt-guests.sh.in
> diff --git a/src/Makefile.am b/src/Makefile.am
> index c7975e5..e0e47d0 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -1000,22 +1000,11 @@ CPU_SOURCES = 
> \
>  VMX_SOURCES =\
>   vmx/vmx.c vmx/vmx.h
>  
> -AM_LFLAGS = -Pxl_disk_ --header-file=../$*.h
> -LEX_OUTPUT_ROOT = lex.xl_disk_
> -BUILT_SOURCES += xenconfig/xen_xl_disk.c xenconfig/xen_xl_disk.h
> -# Generated header file is not implicitly added to dist
> -EXTRA_DIST += xenconfig/xen_xl_disk.h
> -CLEANFILES += xenconfig/xen_xl_disk.h xenconfig/xen_xl_disk.c
> -
> -XENXLDISKPARSER_SOURCES = xenconfig/xen_xl_disk.l
> -
>  XENCONFIG_SOURCES =  \
>   xenconfig/xenxs_private.h   \
> - xenconfig/xen_common.c xenconfig/xen_common.h   \
> + xenconfig/xen_common.c xenconfig/xen_common.h   \
>   xenconfig/xen_sxpr.c xenconfig/xen_sxpr.h   \
> - xenconfig/xen_xm.c xenconfig/xen_xm.h   \
> - xenconfig/xen_xl.c xenconfig/xen_xl.h   \
> - xenconfig/xen_xl_disk_i.h
> + xenconfig/xen_xm.c xenconfig/xen_xm.h
>  
>  pkgdata_DATA =   cpu/cpu_map.xml
>  
> @@ -1070,19 +1059,10 @@ libvirt_vmx_la_SOURCES = $(VMX_SOURCES)
>  endif WITH_VMX
>  
>  if WITH_XENCONFIG
> -# Flex generated XL disk parser needs to be compiled without WARN_FLAGS
> -# Add the generated object to its own library to control CFLAGS
> -noinst_LTLIBRARIES += libvirt_xenxldiskparser.la
> -libvirt_xenxldiskparser_la_CFLAGS = \
> - -I$(srcdir)/conf $(AM_CFLAGS) -Wno-unused-parameter
> -libvirt_xenxldiskparser_la_SOURCES = \
> - $(XENXLDISKPARSER_SOURCES)
> -
>  noinst_LTLIBRARIES += libvirt_xenconfig.la
>  libvirt_la_BUILT_LIBADD += libvirt_xenconfig.la
>  libvirt_xenconfig_la_CFLAGS = \
>   -I$(srcdir)/conf $(AM_CFLAGS)
> -libvirt_xenconfig_la_LIBADD = libvirt_xenxldiskparser.la
>  libvirt_xenconfig_la_SOURCES = $(XENCONFIG_SOURCES)
>  endif WITH_XENCONFIG
>  
> @@ -1844,7 +1824,6 @@ EXTRA_DIST +=   
> \
>   $(VBOX_DRIVER

Re: [Xen-devel] [PATCH v2 1/3] x86: also allow REP STOS emulation acceleration

2015-01-12 Thread George Dunlap
On Fri, Jan 9, 2015 at 12:10 PM, Jan Beulich  wrote:
 On 09.01.15 at 12:45,  wrote:
>> At 11:24 + on 09 Jan (1420799087), Jan Beulich wrote:
>>> >>> On 09.01.15 at 12:18,  wrote:
>>> >> > +default:
>>> >> > +xfree(buf);
>>> >> > +ASSERT(!buf);
>>> >
>>> > looks dodgy...
>>>
>>> In which way? The "default" is supposed to be unreachable, and sits
>>> in the else branch to an if(!buf), i.e. in a release build we'll correctly
>>> free the buffer, while in a debug build the ASSERT() will trigger.
>>
>> Oh I see.  Can you please use ASSERT(0) for that?
>
> I sincerely dislike ASSERT(0), but if that's the only way to get
> the patch accepted...

Um, by what set of criteria is ASSERT(!buf) better than ASSERT(0)?  At
least the second tells the reader that you're not using ASSERT() the
normal way.

I agree that ASSERT_UNREACHABLE() is probably the best option.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [Xen-users] [TestDay] minor bug + possible configuration bug 4.5rc4 archlinux

2015-01-12 Thread Ian Campbell
Adding some CC's, including the devel list.

On Fri, 2015-01-09 at 19:49 -0600, Doug McMillan wrote:
> 
> 
> configuration(?)
> I compiled booted straight from bios with xen.efi during boot I received 
> several errors.
> xl info works (see attachment). XL list locks the terminal session. Checking 
> into the errors I 
> found the following under dmesg (full attached):
> 
> 
>   Ignoring BGRT: invalid status 0 (expected 1) 
>   ACPI: SCI (ACPI GSI 9) not registered
>   kvm: no hardware support
>   mce: Unable to init device /dev/mcelog (rc: -16)
> 
> 
>  systemd[1]: Set hostname to .
>  systemd[1]: var-lib-xenstored.mount's Where= setting doesn't match unit 
> name. Refusing.

I suppose this error message is accurate and the Where field is not
"/var/lib/xenstored"? What is it actually? What options did you pass
to ./configure when building Xen?

This seems like a silly misfeature of systemd to me, but I suppose the
fix is to rename the thing to match, except that would require all the
dependent units to have the field changed too. It might turn out to be
easier to instead arrange for @XEN_LIB_STORED@ to == /var/lib/xenstored.

@devs -- we obviously need to do something about this (too late for 4.5,
but for 4.6 + backport). Perhaps there is some alternative systemd
construction which disassociates the actual path from the abstract
service "xenstored dir mounted"?

Otherwise we'll have to somehow arrange for the file and everything
which depends on it to have a name which reflects the actual mount path,
which sounds pretty awful to me...

Ian.



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2] [Bugfix] x86/apic: Fix xen IRQ allocation failure caused by commit b81975eade8c

2015-01-12 Thread David Vrabel
On 12/01/15 13:39, Jiang Liu wrote:
> Commit b81975eade8c ("x86, irq: Clean up irqdomain transition code")
> breaks xen IRQ allocation because xen_smp_prepare_cpus() doesn't invoke
> setup_IO_APIC(), so no irqdomains created for IOAPICs and
> mp_map_pin_to_irq() fails at the very beginning.
> 
> Enhance xen_smp_prepare_cpus() to call setup_IO_APIC() to initialize
> irqdomain for IOAPICs.

Having Xen call setup_IO_APIC() to initialize the irq domains then having to
add special cases to it is just wrong.

The bits of init deferred by mp_register_apic() are also deferred to
two different places which looks odd.

What about something like the following (untested) patch?

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3f5f604..e180680 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -253,8 +253,10 @@ int __init arch_early_ioapic_init(void)
if (!nr_legacy_irqs())
io_apic_irqs = ~0UL;
 
-   for_each_ioapic(i)
+   for_each_ioapic(i) {
+   BUG_ON(mp_irqdomain_create(ioapic));
alloc_ioapic_saved_registers(i);
+   }
 
/*
 * For legacy IRQ's, start with assigning irq0 to irq15 to
@@ -2379,8 +2381,6 @@ void __init setup_IO_APIC(void)
io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
 
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-   for_each_ioapic(ioapic)
-   BUG_ON(mp_irqdomain_create(ioapic));
 
/*
  * Set up IO-APIC IRQ routing.
@@ -2929,7 +2929,8 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
/*
 * If mp_register_ioapic() is called during early boot stage when
 * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
-* we are still using bootmem allocator. So delay it to setup_IO_APIC().
+* we are still using bootmem allocator. So delay it to
+* arch_early_ioapic_init().
 */
if (hotplug) {
if (mp_irqdomain_create(idx)) {

> --- a/arch/x86/kernel/apic/io_apic.c
> +++ b/arch/x86/kernel/apic/io_apic.c
> @@ -2369,6 +2381,15 @@ static void ioapic_destroy_irqdomain(int idx)
>   ioapics[idx].pin_info = NULL;
>  }
>  
> +static void setup_IO_APIC_IDs(void)
> +{
> + if (xen_domain())
> + return;

This would have to xen_pv_domain().

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [seabios test] 33359: regressions - FAIL

2015-01-12 Thread xen . org
flight 33359 seabios real [real]
http://www.chiark.greenend.org.uk/~xensrcts/logs/33359/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-xl-qemuu-debianhvm-amd64  5 xen-boot fail REGR. vs. 33317

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-amd64-i386-libvirt   9 guest-start  fail   never pass
 test-amd64-amd64-libvirt  9 guest-start  fail   never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-amd64-amd64-xl-pcipt-intel  9 guest-start fail never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass

version targeted for testing:
 seabios  301dd092c2d04a5d70c94b9d873d810785e94a84
baseline version:
 seabios  60e0e55f212dadd043ab9e39bee05a48013ddd8f


People who touched revisions under test:
  Kevin O'Connor 


jobs:
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl  pass
 test-amd64-i386-xl   pass
 test-amd64-amd64-xl-pvh-amd  fail
 test-amd64-i386-rhel6hvm-amd pass
 test-amd64-i386-qemut-rhel6hvm-amd   pass
 test-amd64-i386-qemuu-rhel6hvm-amd   pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64pass
 test-amd64-i386-xl-qemut-debianhvm-amd64 pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64fail
 test-amd64-i386-xl-qemuu-debianhvm-amd64 pass
 test-amd64-i386-freebsd10-amd64  pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass
 test-amd64-amd64-xl-qemut-win7-amd64 fail
 test-amd64-i386-xl-qemut-win7-amd64  fail
 test-amd64-amd64-xl-qemuu-win7-amd64 fail
 test-amd64-i386-xl-qemuu-win7-amd64  fail
 test-amd64-amd64-xl-win7-amd64   fail
 test-amd64-i386-xl-win7-amd64fail
 test-amd64-i386-xl-credit2   pass
 test-amd64-i386-freebsd10-i386   pass
 test-amd64-amd64-xl-pcipt-intel  fail
 test-amd64-amd64-xl-pvh-intelfail
 test-amd64-i386-rhel6hvm-intel   pass
 test-amd64-i386-qemut-rhel6hvm-intel pass
 test-amd64-i386-qemuu-rhel6hvm-intel pass
 test-amd64-amd64-libvirt fail
 test-amd64-i386-libvirt  fail
 test-amd64-i386-xl-multivcpu pass
 test-amd64-amd64-pairpass
 test-amd64-i386-pair pass
 test-amd64-amd64-xl-sedf-pin pass
 test-amd64-

Re: [Xen-devel] [PATCH 00/12] Replace Xen xl parsing/formatting impl

2015-01-12 Thread Ian Campbell
On Fri, 2015-01-09 at 22:03 -0700, Jim Fehlig wrote:
> The first attempt to implement support for parsing/formatting Xen's
> xl disk config format copied Xen's flex-based parser into libvirt, which
> has proved to be challenging in the context of autotools.  But as it turns
> out, Xen provides an interface to the parser via libxlutil.
> 
> This series reverts the first attempt, along with subsequent attempts to
> fix it, and replaces it with an implementation based on libxlutil.  The
> first nine patches revert the original implementation and subsequent fixes.
> Patch 10 provides an implemenation based on libxlutil.  Patches 11 and
> 12 are basically unchanged from patches 3 and 4 in the first attempt.
> 
> One upshot of using libxlutil instead of copying the flex source is
> removing the potential for source divergence.

Thanks for doing this, looks good to me, FWIW.

Is the presence/absence of xen-xl support exposed via virsh anywhere? If
so then I can arrange for my Xen osstest patches for libvirt testing to
use xen-xl when available but still fallback to xen-xm. I've had a look
in "virsh capabilities" and "virsh help domxml-from-native" but not
seeing xen-xm, so assuming xen-xl won't magically appear in any of those
places either.

(TBH, this may become moot since I suspect your patches will be well
established by the time my osstest patches hit osstest...)

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen/blkfront: restart request queue when there is enough persistent_gnts_c

2015-01-12 Thread Roger Pau Monné
El 12/01/15 a les 14.04, David Vrabel ha escrit:
> On 12/01/15 11:36, Roger Pau Monné wrote:
>> El 12/01/15 a les 8.09, Bob Liu ha escrit:
>>>
>>> On 01/09/2015 11:51 PM, Roger Pau Monné wrote:
 El 06/01/15 a les 14.19, Bob Liu ha escrit:
> When there is no enough free grants, gnttab_alloc_grant_references()
> will fail and block request queue will stop.
> If the system is always lack of grants, blkif_restart_queue_callback() 
> can't be
> scheduled and block request queue can't be restart(block I/O hang).
>
> But when there are former requests complete, some grants may free to
> persistent_gnts_c, we can give the request queue another chance to 
> restart and
> avoid block hang.
>
> Reported-by: Junxiao Bi 
> Signed-off-by: Bob Liu 
> ---
>  drivers/block/xen-blkfront.c |   11 +++
>  1 file changed, 11 insertions(+)
>
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 2236c6f..dd30f99 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -1125,6 +1125,17 @@ static void blkif_completion(struct blk_shadow *s, 
> struct blkfront_info *info,
>   }
>   }
>   }
> +
> + /*
> +  * Request queue would be stopped if failed to alloc enough grants and
> +  * won't be restarted until gnttab_free_count >= info->callback->count.
> +  *
> +  * But there is another case, once we have enough persistent grants we
> +  * can try to restart the request queue instead of continue to wait for
> +  * 'gnttab_free_count'.
> +  */
> + if (info->persistent_gnts_c >= info->callback.count)
> + schedule_work(&info->work);

 I guess I'm missing something here, but blkif_completion is called by
 blkif_interrupt, which in turn calls kick_pending_request_queues when
 finished, which IMHO should be enough to restart the processing of 
 requests.

>>>
>>> You are right, sorry for the mistake.
>>>
>>> The problem we met was a xenblock I/O hang.
>>> Dumped data showed at that time info->persistent_gnt_c = 8, max_gref = 8
>>> but block request queue was still stopped.
>>> It's very hard to reproduce this issue, we only see it once.
>>>
>>> I think there might be a race condition:
>>>
>>> request A  request B:
>>>
>>>info->persistent_gnts_c < max_grefs
>>>and fail to alloc enough grants
>>>
>>>
>>> 
>>> interrupt happen, blkif_complte():
>>> info->persistent_gnts_c++
>>> kick_pending_request_queues()

blkif_interrupt can never interrupt blkif_queue_request, because it's
holding a spinlock (info->io_lock). If you have seen this trace in the
wild it means something is really wrong and we are calling
blkif_queue_request without acquiring the spinlock and thus without
disabling interrupts.

>>>
>>> stop block request queue
>>> added to callback list
>>>
>>> If the system don't have enough grants(but have enough persistent_gnts),
>>> request queue would still hang.
>>
>> Not sure how can this happen, blkif_queue_request explicitly checks that
>> persistent_gnts_c < max_grefs before adding the callback and stopping
>> the request queue, so in your case the queue should not be blocked. Can
>> you dump the state of info->connected?
> 
> I think Bob has correctly identified a race.
> 
> After calling blk_stop_queue(), check info->persistent_gnts again and
> restart the queue if there free grefs.
> 
> David
> 
> 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [xen-4.5-testing test] 33348: tolerable FAIL - PUSHED

2015-01-12 Thread xen . org
flight 33348 xen-4.5-testing real [real]
http://www.chiark.greenend.org.uk/~xensrcts/logs/33348/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-pvh-intel  9 guest-start  fail  never pass
 test-amd64-amd64-xl-pvh-amd   9 guest-start  fail   never pass
 test-armhf-armhf-libvirt  9 guest-start  fail   never pass
 test-armhf-armhf-xl  10 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt  9 guest-start  fail   never pass
 test-amd64-amd64-xl-pcipt-intel  9 guest-start fail never pass
 test-amd64-i386-libvirt   9 guest-start  fail   never pass
 test-amd64-i386-xl-winxpsp3  14 guest-stop   fail   never pass
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3 14 guest-stopfail never pass
 test-amd64-i386-xl-winxpsp3-vcpus1 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemut-win7-amd64 14 guest-stop fail never pass
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 14 guest-stop fail never pass
 test-amd64-i386-xl-win7-amd64 14 guest-stop   fail  never pass
 test-amd64-i386-xl-qemuu-win7-amd64 14 guest-stop  fail never pass
 test-amd64-amd64-xl-win7-amd64 14 guest-stop   fail never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 14 guest-stop fail never pass
 test-amd64-amd64-xl-qemut-winxpsp3 14 guest-stop   fail never pass
 test-amd64-i386-xl-qemut-win7-amd64 14 guest-stop  fail never pass
 test-amd64-i386-pair17 guest-migrate/src_host/dst_host fail never pass
 test-amd64-i386-xl-qemut-winxpsp3 14 guest-stopfail never pass
 test-amd64-amd64-xl-qemuu-winxpsp3 14 guest-stop   fail never pass
 test-amd64-amd64-xl-winxpsp3 14 guest-stop   fail   never pass

version targeted for testing:
 xen  3508d75155dae3154d10cd5c33d44336f6f4bf1c
baseline version:
 xen  36174af3fbeb1b662c0eadbfa193e77f68cc955b


People who touched revisions under test:
  Andrew Cooper 
  Boris Ostrovsky 
  Ed Swierk 
  Ian Campbell 
  Ian Campbell 
  Ian Jackson 
  Ian Jackson 
  Jan Beulich 
  Julien Grall 
  Kevin Tian 
  Konrad Rzeszutek Wilk 
  Mihai Donțu 
  Olaf Hering 
  Robert Hu 
  Răzvan Cojocaru 
  Stefano Stabellini 
  Wei Liu 
  Yang Hongyang 


jobs:
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 build-amd64-rumpuserxen  pass
 build-i386-rumpuserxen   pass
 test-amd64-amd64-xl  pass
 test-armhf-armhf-xl  pass
 test-amd64-i386-xl   pass
 test-amd64-amd64-xl-pvh-amd  fail
 test-amd64-i386-rhel6hvm-amd pass
 test-amd64-i386-qemut-rhel6hvm-amd   pass
 test-amd64-i386-qemuu-rhel6hvm-amd   pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64pass
 test-amd64-i386-xl-qemut-debianhvm-amd64 pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-i386-xl-qemuu-debianhvm-amd64 pass
 test-amd64-i386-freebsd10-amd64  pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass
 test-amd64-amd64-rumpuserxen-amd64   pass
 test-amd64-amd64-xl-qemut-win7-amd64 fail
 test-amd64-i386-xl-qemut-win7-amd64  fail
 test-amd64-amd64-xl-qemuu-win7-amd64 fail
 test-amd64-i386-xl-qemuu-win7-amd64  fail
 test-amd64-amd64-xl-win7-amd64   fail
 test-amd64-i386-xl-win7-amd64fail
 test-amd64-i386-xl-credit2   pass
 test-amd64-i386-freebsd10-i386   pass
 test-amd64-i386-rumpuserxen-i386

[Xen-devel] [PATCH v2] xenstored: log tdb message via xenstored's logging mechanisms

2015-01-12 Thread Ian Campbell
TDB provides us with a callback for this purpose. Use it in both
xenstored and xs_tdb_dump.

While at it make the existing log() macro tollerate memory failures.

Signed-off-by: Ian Campbell 
---
v2: Use &tdb_logger consistently.
Did not: move location of talloc_free, since talloc_free(NULL)
returns an error.
---
 tools/xenstore/xenstored_core.c |   39 +--
 tools/xenstore/xs_tdb_dump.c|   12 +++-
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 4eaff57..3fd9a20 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -89,9 +89,14 @@ static void check_store(void);
 #define log(...)   \
do {\
char *s = talloc_asprintf(NULL, __VA_ARGS__);   \
-   trace("%s\n", s);   \
-   syslog(LOG_ERR, "%s",  s);  \
-   talloc_free(s); \
+   if (s) {\
+   trace("%s\n", s);   \
+   syslog(LOG_ERR, "%s",  s);  \
+   talloc_free(s); \
+   } else {\
+   trace("talloc failure during logging\n");   \
+   syslog(LOG_ERR, "talloc failure during logging\n"); \
+   }   \
} while (0)
 
 
@@ -1479,13 +1484,35 @@ static void manual_node(const char *name, const char 
*child)
talloc_free(node);
 }
 
+static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...)
+{
+   va_list ap;
+   char *s;
+
+   va_start(ap, fmt);
+   s = talloc_vasprintf(NULL, fmt, ap);
+   va_end(ap);
+
+   if (s) {
+   trace("TDB: %s\n", s);
+   syslog(LOG_ERR, "TDB: %s",  s);
+   if (verbose)
+   xprintf("TDB: %s", s);
+   talloc_free(s);
+   } else {
+   trace("talloc failure during logging\n");
+   syslog(LOG_ERR, "talloc failure during logging\n");
+   }
+}
+
 static void setup_structure(void)
 {
char *tdbname;
tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
 
if (!(tdb_flags & TDB_INTERNAL))
-   tdb_ctx = tdb_open(tdbname, 0, tdb_flags, O_RDWR, 0);
+   tdb_ctx = tdb_open_ex(tdbname, 0, tdb_flags, O_RDWR, 0,
+ &tdb_logger, NULL);
 
if (tdb_ctx) {
/* XXX When we make xenstored able to restart, this will have
@@ -1516,8 +1543,8 @@ static void setup_structure(void)
talloc_free(tlocal);
}
else {
-   tdb_ctx = tdb_open(tdbname, 7919, tdb_flags, O_RDWR|O_CREAT,
-  0640);
+   tdb_ctx = tdb_open_ex(tdbname, 7919, tdb_flags, O_RDWR|O_CREAT,
+ 0640, &tdb_logger, NULL);
if (!tdb_ctx)
barf_perror("Could not create tdb file %s", tdbname);
 
diff --git a/tools/xenstore/xs_tdb_dump.c b/tools/xenstore/xs_tdb_dump.c
index b91cdef..9f636f9 100644
--- a/tools/xenstore/xs_tdb_dump.c
+++ b/tools/xenstore/xs_tdb_dump.c
@@ -33,6 +33,15 @@ static char perm_to_char(enum xs_perm_type perm)
'?';
 }
 
+static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...)
+{
+   va_list ap;
+
+   va_start(ap, fmt);
+   vfprintf(stderr, fmt, ap);
+   va_end(ap);
+}
+
 int main(int argc, char *argv[])
 {
TDB_DATA key;
@@ -41,7 +50,8 @@ int main(int argc, char *argv[])
if (argc != 2)
barf("Usage: xs_tdb_dump ");
 
-   tdb = tdb_open(talloc_strdup(NULL, argv[1]), 0, 0, O_RDONLY, 0);
+   tdb = tdb_open_ex(talloc_strdup(NULL, argv[1]), 0, 0, O_RDONLY, 0,
+ &tdb_logger, NULL);
if (!tdb)
barf_perror("Could not open %s", argv[1]);
 
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH] Add a flight to test OVMF master branch

2015-01-12 Thread Wei Liu
Do the usual stuffs for adding a new branch (ap-* cr-* etc).

Modify ts-xen-build so that it builds Xen with the specified ovmf tree
and revision.

Only build and test on x86 by modifying make-flight and mfi-common.

New branch is added to cr-daily-branch. It exports the tree and
changeset used for the test if set. Otherwise the values in Config.mk
are used.

Signed-off-by: Wei Liu 
---
 ap-common|5 +
 ap-fetch-version |4 
 ap-fetch-version-old |5 +
 ap-print-url |3 +++
 ap-push  |5 +
 cr-daily-branch  |   14 ++
 cr-for-branches  |2 +-
 cri-common   |1 +
 make-flight  |6 ++
 mfi-common   |5 -
 ts-xen-build |7 +++
 11 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/ap-common b/ap-common
index 96cbd14..e2c3e42 100644
--- a/ap-common
+++ b/ap-common
@@ -51,6 +51,10 @@
 : ${PUSH_TREE_SEABIOS:=$XENBITS:/home/xen/git/osstest/seabios.git}
 : ${BASE_TREE_SEABIOS:=git://xenbits.xen.org/osstest/seabios.git}
 
+: ${TREE_OVMF_UPSTREAM:=https://github.com/tianocore/edk2.git}
+: ${PUSH_TREE_OVMF:=$XENBITS:/home/xen/git/osstest/ovmf.git}
+: ${BASE_TREE_OVMF:=git://xenbits.xen.org/osstest/ovmf.git}
+
 : ${TREE_LINUXFIRMWARE:=git://xenbits.xen.org/osstest/linux-firmware.git}
 : ${PUSH_TREE_LINUXFIRMWARE:=$XENBITS:/home/osstest/ext/linux-firmware.git}
 : 
${UPSTREAM_TREE_LINUXFIRMWARE:=$GIT_KERNEL_ORG/pub/scm/linux/kernel/git/firmware/linux-firmware.git}
@@ -77,6 +81,7 @@ fi
 : ${LOCALREV_LIBVIRT:=daily-cron.$branch}
 : ${LOCALREV_RUMPUSERXEN:=daily-cron.$branch}
 : ${LOCALREV_SEABIOS:=daily-cron.$branch}
+: ${LOCALREV_OVMF:=daily-cron.$branch}
 
 : ${TREEBASE_LINUX_XCP:=http://hg.uk.xensource.com/carbon/trunk/linux-2.6.27}
 
diff --git a/ap-fetch-version b/ap-fetch-version
index f6c65d8..33aaf00 100755
--- a/ap-fetch-version
+++ b/ap-fetch-version
@@ -85,6 +85,10 @@ seabios)
repo_tree_rev_fetch_git seabios \
$TREE_SEABIOS_UPSTREAM master $LOCALREV_SEABIOS
;;
+ovmf)
+   repo_tree_rev_fetch_git ovmf \
+   $TREE_OVMF_UPSTREAM master $LOCALREV_OVMF
+   ;;
 osstest)
if [ "x$OSSTEST_USE_HEAD" != "xy" ] ; then
git fetch $HOME/testing.git pretest:ap-fetch >&2
diff --git a/ap-fetch-version-old b/ap-fetch-version-old
index 43c997c..7b7f50e 100755
--- a/ap-fetch-version-old
+++ b/ap-fetch-version-old
@@ -30,6 +30,7 @@ select_xenbranch
 : ${BASE_LOCALREV_LIBVIRT:=daily-cron.$branch.old}
 : ${BASE_LOCALREV_RUMPUSERXEN:=daily-cron.$branch.old}
 : ${BASE_LOCALREV_SEABIOS:=daily-cron.$branch.old}
+: ${BASE_LOCALREV_OVMF:=daily-cron.$branch.old}
 
 : ${BASE_TREE_QEMU_UPSTREAM:=${TREE_QEMU_UPSTREAM/\/staging\//\/}}
 
@@ -92,6 +93,10 @@ seabios)
repo_tree_rev_fetch_git seabios \
$BASE_TREE_SEABIOS xen-tested-master $BASE_LOCALREV_SEABIOS
;;
+ovmf)
+   repo_tree_rev_fetch_git ovmf \
+   $BASE_TREE_OVMF xen-tested-master $BASE_LOCALREV_OVMF
+   ;;
 osstest)
if [ "x$OSSTEST_USE_HEAD" != "xy" ] ; then
git fetch -f $HOME/testing.git incoming:ap-fetch
diff --git a/ap-print-url b/ap-print-url
index 7b27e1e..3db2375 100755
--- a/ap-print-url
+++ b/ap-print-url
@@ -58,6 +58,9 @@ rumpuserxen)
 seabios)
echo $TREE_SEABIOS_UPSTREAM
;;
+ovmf)
+   echo $TREE_OVMF_UPSTREAM
+   ;;
 osstest)
echo none:;
;;
diff --git a/ap-push b/ap-push
index a2aa747..01089f3 100755
--- a/ap-push
+++ b/ap-push
@@ -36,6 +36,7 @@ TREE_XEN=$PUSH_TREE_XEN
 TREE_LIBVIRT=$PUSH_TREE_LIBVIRT
 TREE_RUMPUSERXEN=$PUSH_TREE_RUMPUSERXEN
 TREE_SEABIOS=$PUSH_TREE_SEABIOS
+TREE_OVMF=$PUSH_TREE_OVMF
 
 if info_linux_tree "$branch"; then
cd $repos/linux
@@ -92,6 +93,10 @@ seabios)
cd $repos/seabios
git push $TREE_SEABIOS $revision:refs/heads/xen-tested-master
;;
+ovmf)
+   cd $repos/ovmf
+   git push $TREE_OVMF $revision:refs/heads/xen-tested-master
+   ;;
 osstest)
git push $HOME/testing.git $revision:incoming
git push $XENBITS:/home/xen/git/osstest.git $revision:master
diff --git a/cr-daily-branch b/cr-daily-branch
index fc663ce..c7d1071 100755
--- a/cr-daily-branch
+++ b/cr-daily-branch
@@ -146,6 +146,16 @@ if [ "x$REVISION_SEABIOS" = x ]; then
: REVISION_SEABIOS from Config.mk
 fi
 fi
+if [ "x$REVISION_OVMF" = x ]; then
+if [ "x$tree" = "xovmf" ]; then
+   TREE_OVMF=$TREE_OVMF_UPSTREAM
+   export TREE_OVMF
+   determine_version REVISION_OVMF ovmf OVMF
+   export REVISION_OVMF
+else
+   : REVISION_OVMF from Config.mk
+fi
+fi
 if [ "x$REVISION_LIBVIRT" = x ]; then
determine_version REVISION_LIBVIRT libvirt LIBVIRT
export REVISION_LIBVIRT
@@ -203,6 +213,10 @@ seabios)
realtree=seabios
NEW_REVISION=$REVISION_SEABIOS
;;
+ovmf)
+   realtree=ovmf
+   NEW_REVISION=$REVISION_OVMF
+   ;;
 *)
  

  1   2   3   >