On Fri, Dec 09, 2016 at 09:48:32AM -0700, Jan Beulich wrote: > >>> On 30.11.16 at 17:49, <roger....@citrix.com> wrote: > > @@ -302,7 +307,8 @@ static unsigned long __init compute_dom0_nr_pages( > > avail -= max_pdx >> s; > > } > > > > - need_paging = opt_dom0_shadow || (is_pvh_domain(d) && > > !iommu_hap_pt_share); > > + need_paging = opt_dom0_shadow || (has_hvm_container_domain(d) && > > + (!iommu_hap_pt_share || !paging_mode_hap(d))); > > Indentation.
Fixed. > > @@ -545,11 +552,12 @@ static __init void pvh_map_all_iomem(struct domain > > *d, unsigned long nr_pages) > > ASSERT(nr_holes == 0); > > } > > > > -static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages) > > +static __init void hvm_setup_e820(struct domain *d, unsigned long nr_pages) > > Why? So that afterwards I can remove all the pvh_ functions and leave the hvm_ ones only. But seeing your response to the other patch, would you prefer me to just use pvh_ for the new functions also? > > @@ -577,8 +585,19 @@ static __init void pvh_setup_e820(struct domain *d, > > unsigned long nr_pages) > > continue; > > } > > > > - *entry_guest = *entry; > > - pages = PFN_UP(entry_guest->size); > > + /* > > + * Make sure the start and length are aligned to PAGE_SIZE, because > > + * that's the minimum granularity of the 2nd stage translation. > > + */ > > + start = ROUNDUP(entry->addr, PAGE_SIZE); > > + end = (entry->addr + entry->size) & PAGE_MASK; > > Taking the comment into consideration, I wonder whether you > wouldn't better use PAGE_ORDER_4K here, as that's what the > p2m code uses. That's going to be more cumbersome, since PAGE_SIZE would become 1UL << PAGE_ORDER_4K << PAGE_SHIFT, and PAGE_MASK is going to be -1 and ~ of the previous construct. But I see your point, maybe I should define PAGE_SIZE_4K and PAGE_MASK_4K in xen/include/asm-x86/page.h? > > @@ -1010,8 +1029,6 @@ static int __init construct_dom0_pv( > > BUG_ON(d->vcpu[0] == NULL); > > BUG_ON(v->is_initialised); > > > > - process_pending_softirqs(); > > Wouldn't this adjustment better fit into the previous patch, together > with its companion below? Yes, I guess I must have forgotten to move this. > > +static int __init hvm_steal_ram(struct domain *d, unsigned long size, > > + paddr_t limit, paddr_t *addr) > > +{ > > + unsigned int i = d->arch.nr_e820; > > + > > + while ( i-- ) > > + { > > + struct e820entry *entry = &d->arch.e820[i]; > > + > > + if ( entry->type != E820_RAM || entry->size < size ) > > + continue; > > + > > + /* Subtract from the beginning. */ > > + if ( entry->addr + size < limit && entry->addr >= MB(1) ) > > <= I think (for the left comparison)? Yes. > > +static void __init hvm_steal_low_ram(struct domain *d, unsigned long start, > > + unsigned long nr_pages) > > +{ > > + unsigned long mfn; > > + > > + ASSERT(start + nr_pages < PFN_DOWN(MB(1))); > > <= again I think. Right. > > +static int __init hvm_setup_p2m(struct domain *d) > > +{ > > + struct vcpu *v = d->vcpu[0]; > > + unsigned long nr_pages; > > + unsigned int i; > > + int rc; > > + bool preempted; > > +#define MB1_PAGES PFN_DOWN(MB(1)) > > + > > + nr_pages = compute_dom0_nr_pages(d, NULL, 0); > > + > > + hvm_setup_e820(d, nr_pages); > > + do { > > + preempted = false; > > + paging_set_allocation(d, dom0_paging_pages(d, nr_pages), > > + &preempted); > > + process_pending_softirqs(); > > + } while ( preempted ); > > + > > + /* > > + * Memory below 1MB is identity mapped. > > + * NB: this only makes sense when booted from legacy BIOS. > > + */ > > + rc = modify_identity_mmio(d, 0, PFN_DOWN(MB(1)), true); > > + if ( rc ) > > + { > > + printk("Failed to identity map low 1MB: %d\n", rc); > > + return rc; > > + } > > + > > + /* Populate memory map. */ > > + for ( i = 0; i < d->arch.nr_e820; i++ ) > > + { > > + unsigned long addr, size; > > + > > + if ( d->arch.e820[i].type != E820_RAM ) > > + continue; > > + > > + addr = PFN_DOWN(d->arch.e820[i].addr); > > + size = PFN_DOWN(d->arch.e820[i].size); > > + > > + if ( addr >= MB1_PAGES ) > > + rc = hvm_populate_memory_range(d, addr, size); > > + else if ( addr + size > MB1_PAGES ) > > + { > > + hvm_steal_low_ram(d, addr, MB1_PAGES - addr); > > + rc = hvm_populate_memory_range(d, MB1_PAGES, > > + size - (MB1_PAGES - addr)); > > Is this case possible at all? All x86 systems have some form of > BIOS right below the 1Mb boundary, and the E820 map for > Dom0 is being derived from the host one. Heh, I don't think so but I wanted to cover all possible inputs. TBH I have no idea how broken e820 memory maps can really be. Would you be fine with removing this case and adding an ASSERT instead? > > --- a/xen/arch/x86/mm.c > > +++ b/xen/arch/x86/mm.c > > @@ -475,6 +475,43 @@ void share_xen_page_with_guest( > > spin_unlock(&d->page_alloc_lock); > > } > > > > +int unshare_xen_page_with_guest(struct page_info *page, struct domain *d) > > __init > > And once its __init, it may be possible to simplify it, as you don't need > to fear races anymore. E.g. you wouldn't need a loop over cmpxchg(). Indeed. > > +{ > > + unsigned long y, x; > > + bool drop_dom_ref; > > + > > + if ( page_get_owner(page) != d || !(page->count_info & PGC_xen_heap) ) > > Please don't open code is_xen_heap_page(). Right, I'm not very knowledgeable of the mm functions yet. > > + return -EINVAL; > > + > > + spin_lock(&d->page_alloc_lock); > > + > > + /* Drop the page reference so we can chanfge the owner. */ > > + y = page->count_info; > > + do { > > + x = y; > > + if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) ) > > + { > > + spin_unlock(&d->page_alloc_lock); > > + return -EINVAL; > > + } > > + y = cmpxchg(&page->count_info, x, PGC_xen_heap); > > + } while ( y != x ); > > + > > + /* Remove the page form the list of domain pages. */ > > + page_list_del(page, &d->xenpage_list); > > + drop_dom_ref = (--d->xenheap_pages == 0); > > Aren't you open coding > > if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) > put_page(page); > > here (except for the check on the initial value, which could be > moved up)? Yes, that's right. > > + /* Remove the owner and clear the flags. */ > > + page_set_owner(page, NULL); > > + page->u.inuse.type_info = 0; > > I think you'd better bail early if this is non-zero. Or else please use > the order used elsewhere (clearing type info, then owner) - while > it's benign, it avoids someone later wondering whether the order > is wrong in either place. It's certainly going to be non-zero, because share_xen_page_with_guests sets it to: page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); page->u.inuse.type_info |= PGT_validated | 1; I've ended up coding it as: int __init unshare_xen_page_with_guest(struct page_info *page, struct domain *d) { if ( page_get_owner(page) != d || !is_xen_heap_page(page) ) return -EINVAL; if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); /* Remove the owner and clear the flags. */ page->u.inuse.type_info = 0; page_set_owner(page, NULL); return 0; } (note that put_page does indeed use a cmpxchg, but the benefits of not open coding it far outweighs the penalty of using cmpxchg IMHO). Roger. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org https://lists.xen.org/xen-devel