Allow guest pagetables to go out of sync.
Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -148,6 +148,7 @@ struct kvm_shadow_walk {
};
typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page
*sp);
+typedef int (*mmu_unsync_fn) (struct kvm_mmu_page *sp, void *priv);
static struct kmem_cache *pte_chain_cache;
static struct kmem_cache *rmap_desc_cache;
@@ -942,6 +943,39 @@ static void nonpaging_invlpg(struct kvm_
{
}
+static int mmu_unsync_walk(struct kvm_mmu_page *parent, mmu_unsync_fn fn,
+ void *priv)
+{
+ int i, ret;
+ struct kvm_mmu_page *sp = parent;
+
+ while (parent->unsync_children) {
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+ u64 ent = sp->spt[i];
+
+ if (is_shadow_present_pte(ent)) {
+ struct kvm_mmu_page *child;
+ child = page_header(ent & PT64_BASE_ADDR_MASK);
+
+ if (child->unsync_children) {
+ sp = child;
+ break;
+ }
+ if (child->unsync) {
+ ret = fn(child, priv);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ if (i == PT64_ENT_PER_PAGE) {
+ sp->unsync_children = 0;
+ sp = parent;
+ }
+ }
+ return 0;
+}
+
static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
{
unsigned index;
@@ -962,6 +996,47 @@ static struct kvm_mmu_page *kvm_mmu_look
return NULL;
}
+static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ WARN_ON(!sp->unsync);
+ sp->unsync = 0;
+ --kvm->stat.mmu_unsync;
+}
+
+static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+
+static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+ if (sp->role.glevels != vcpu->arch.mmu.root_level) {
+ kvm_mmu_zap_page(vcpu->kvm, sp);
+ return 1;
+ }
+
+ rmap_write_protect(vcpu->kvm, sp->gfn);
+ if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
+ kvm_mmu_zap_page(vcpu->kvm, sp);
+ return 1;
+ }
+
+ kvm_mmu_flush_tlb(vcpu);
+ kvm_unlink_unsync_page(vcpu->kvm, sp);
+ return 0;
+}
+
+static int mmu_sync_fn(struct kvm_mmu_page *sp, void *priv)
+{
+ struct kvm_vcpu *vcpu = priv;
+
+ kvm_sync_page(vcpu, sp);
+ return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
+}
+
+static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+ while (mmu_unsync_walk(sp, mmu_sync_fn, vcpu))
+ cond_resched_lock(&vcpu->kvm->mmu_lock);
+}
+
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
gva_t gaddr,
@@ -975,7 +1050,7 @@ static struct kvm_mmu_page *kvm_mmu_get_
unsigned quadrant;
struct hlist_head *bucket;
struct kvm_mmu_page *sp;
- struct hlist_node *node;
+ struct hlist_node *node, *tmp;
role.word = 0;
role.glevels = vcpu->arch.mmu.root_level;
@@ -991,8 +1066,18 @@ static struct kvm_mmu_page *kvm_mmu_get_
gfn, role.word);
index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
- hlist_for_each_entry(sp, node, bucket, hash_link)
- if (sp->gfn == gfn && sp->role.word == role.word) {
+ hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
+ if (sp->gfn == gfn) {
+ if (sp->unsync)
+ if (kvm_sync_page(vcpu, sp))
+ continue;
+
+ if (sp->role.word != role.word)
+ continue;
+
+ if (sp->unsync_children)
+ vcpu->arch.mmu.need_root_sync = 1;
+
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
pgprintk("%s: found\n", __func__);
return sp;
@@ -1112,14 +1197,45 @@ static void kvm_mmu_unlink_parents(struc
}
}
+struct mmu_zap_walk {
+ struct kvm *kvm;
+ int zapped;
+};
+
+static int mmu_zap_fn(struct kvm_mmu_page *sp, void *private)
+{
+ struct mmu_zap_walk *zap_walk = private;
+
+ kvm_mmu_zap_page(zap_walk->kvm, sp);
+ zap_walk->zapped = 1;
+ return 0;
+}
+
+static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ struct mmu_zap_walk mmu_zap_walk = {
+ .kvm = kvm,
+ .zapped = 0,
+ };
+
+ if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+ return 0;
+ mmu_unsync_walk(sp, mmu_zap_fn, &mmu_zap_walk);
+ return mmu_zap_walk.zapped;
+}
+
static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
+ int ret;
++kvm->stat.mmu_shadow_zapped;
+ ret = mmu_zap_unsync_children(kvm, sp);
kvm_mmu_page_unlink_children(kvm, sp);
kvm_mmu_unlink_parents(kvm, sp);
kvm_flush_remote_tlbs(kvm);
if (!sp->role.invalid && !sp->role.metaphysical)
unaccount_shadowed(kvm, sp->gfn);
+ if (sp->unsync)
+ kvm_unlink_unsync_page(kvm, sp);
if (!sp->root_count) {
hlist_del(&sp->hash_link);
kvm_mmu_free_page(kvm, sp);
@@ -1129,7 +1245,7 @@ static int kvm_mmu_zap_page(struct kvm *
kvm_reload_remote_mmus(kvm);
}
kvm_mmu_reset_last_pte_updated(kvm);
- return 0;
+ return ret;
}
/*
@@ -1221,10 +1337,57 @@ struct page *gva_to_page(struct kvm_vcpu
return page;
}
+static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+ sp->unsync_children = 1;
+ return 1;
+}
+
+static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+ unsigned index;
+ struct hlist_head *bucket;
+ struct kvm_mmu_page *s;
+ struct hlist_node *node, *n;
+
+ index = kvm_page_table_hashfn(sp->gfn);
+ bucket = &vcpu->kvm->arch.mmu_page_hash[index];
+ /* don't unsync if pagetable is shadowed with multiple roles */
+ hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
+ if (s->gfn != sp->gfn || s->role.metaphysical)
+ continue;
+ if (s->role.word != sp->role.word)
+ return 1;
+ }
+ mmu_parent_walk(vcpu, sp, unsync_walk_fn);
+ ++vcpu->kvm->stat.mmu_unsync;
+ sp->unsync = 1;
+ return 0;
+}
+
+static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
+ bool can_unsync)
+{
+ struct kvm_mmu_page *shadow;
+
+ shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
+ if (shadow) {
+ if (shadow->role.level != PT_PAGE_TABLE_LEVEL)
+ return 1;
+ if (shadow->unsync)
+ return 0;
+ if (can_unsync)
+ return kvm_unsync_page(vcpu, shadow);
+ return 1;
+ }
+ return 0;
+}
+
static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pte_access, int user_fault,
int write_fault, int dirty, int largepage,
- gfn_t gfn, pfn_t pfn, bool speculative)
+ gfn_t gfn, pfn_t pfn, bool speculative,
+ bool can_unsync)
{
u64 spte;
int ret = 0;
@@ -1251,7 +1414,6 @@ static int set_spte(struct kvm_vcpu *vcp
if ((pte_access & ACC_WRITE_MASK)
|| (write_fault && !is_write_protection(vcpu) && !user_fault)) {
- struct kvm_mmu_page *shadow;
if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) {
ret = 1;
@@ -1261,8 +1423,7 @@ static int set_spte(struct kvm_vcpu *vcp
spte |= PT_WRITABLE_MASK;
- shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
- if (shadow) {
+ if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
pgprintk("%s: found shadow page for %lx, marking ro\n",
__func__, gfn);
ret = 1;
@@ -1280,7 +1441,6 @@ set_pte:
return ret;
}
-
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
@@ -1318,7 +1478,7 @@ static void mmu_set_spte(struct kvm_vcpu
}
}
if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
- dirty, largepage, gfn, pfn, speculative)) {
+ dirty, largepage, gfn, pfn, speculative, true)) {
if (write_fault)
*ptwrite = 1;
if (was_writeble)
@@ -1539,10 +1699,6 @@ static void mmu_alloc_roots(struct kvm_v
vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
}
-static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
-{
-}
-
static void mmu_sync_roots(struct kvm_vcpu *vcpu)
{
int i;
Index: kvm/include/asm-x86/kvm_host.h
===================================================================
--- kvm.orig/include/asm-x86/kvm_host.h
+++ kvm/include/asm-x86/kvm_host.h
@@ -195,6 +195,8 @@ struct kvm_mmu_page {
*/
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
+ bool unsync;
+ bool unsync_children;
union {
u64 *parent_pte; /* !multimapped */
struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -226,6 +228,7 @@ struct kvm_mmu {
hpa_t root_hpa;
int root_level;
int shadow_root_level;
+ bool need_root_sync;
u64 *pae_root;
};
@@ -371,6 +374,7 @@ struct kvm_vm_stat {
u32 mmu_flooded;
u32 mmu_recycled;
u32 mmu_cache_miss;
+ u32 mmu_unsync;
u32 remote_tlb_flush;
u32 lpages;
};
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -101,6 +101,7 @@ struct kvm_stats_debugfs_item debugfs_en
{ "mmu_flooded", VM_STAT(mmu_flooded) },
{ "mmu_recycled", VM_STAT(mmu_recycled) },
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
+ { "mmu_unsync", VM_STAT(mmu_unsync) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages) },
{ NULL }
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -449,6 +449,11 @@ static int FNAME(page_fault)(struct kvm_
kvm_mmu_audit(vcpu, "post page fault (fixed)");
spin_unlock(&vcpu->kvm->mmu_lock);
+ if (vcpu->arch.mmu.need_root_sync) {
+ kvm_mmu_sync_roots(vcpu);
+ vcpu->arch.mmu.need_root_sync = 0;
+ }
+
return write_pt;
out_unlock:
@@ -576,7 +581,7 @@ static int FNAME(sync_page)(struct kvm_v
pte_access = sp->role.access & FNAME(gpte_access)(vcpu,
gpte);
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_pte(gpte), 0, gfn,
- spte_to_pfn(sp->spt[i]), true);
+ spte_to_pfn(sp->spt[i]), true, false);
}
}
--
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html