Add a stage-2 page table generator, the s2_mmu structure, and vEL2
stage-2 preparation code for a guest hypervisor to turn on stage-2
translation for its nested guest.

Signed-off-by: Wei-Lin Chang <[email protected]>
---
 .../selftests/kvm/arm64/hello_nested.c        |   2 +-
 .../selftests/kvm/arm64/shadow_stage2.c       |   2 +-
 .../selftests/kvm/include/arm64/nested.h      |  15 +-
 .../testing/selftests/kvm/lib/arm64/nested.c  | 145 +++++++++++++++++-
 4 files changed, 160 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/arm64/hello_nested.c 
b/tools/testing/selftests/kvm/arm64/hello_nested.c
index 9ed5285f5f2d..b57e41c73214 100644
--- a/tools/testing/selftests/kvm/arm64/hello_nested.c
+++ b/tools/testing/selftests/kvm/arm64/hello_nested.c
@@ -62,7 +62,7 @@ static void guest_code(void)
        l2_stack_top = ucall_translate_to_gpa(&l2_stack[L2STACKSZ]);
 
        init_vcpu(&vcpu, l2_pc, l2_stack_top);
-       prepare_hyp();
+       prepare_hyp_no_s2();
 
        ret = run_l2(&vcpu, &hyp_data);
        GUEST_ASSERT_EQ(ret, ARM_EXCEPTION_TRAP);
diff --git a/tools/testing/selftests/kvm/arm64/shadow_stage2.c 
b/tools/testing/selftests/kvm/arm64/shadow_stage2.c
index c5332b8b5683..2b274b810dcf 100644
--- a/tools/testing/selftests/kvm/arm64/shadow_stage2.c
+++ b/tools/testing/selftests/kvm/arm64/shadow_stage2.c
@@ -72,7 +72,7 @@ static void guest_code(void)
        l2_pc = ucall_translate_to_gpa(l2_guest_code);
 
        init_vcpu(&vcpu, l2_pc, l2_stack_top);
-       prepare_hyp();
+       prepare_hyp_no_s2();
 
        while (true) {
                GUEST_PRINTF("L2 enter\n");
diff --git a/tools/testing/selftests/kvm/include/arm64/nested.h 
b/tools/testing/selftests/kvm/include/arm64/nested.h
index fc59fabff12d..1bcbb31b8d67 100644
--- a/tools/testing/selftests/kvm/include/arm64/nested.h
+++ b/tools/testing/selftests/kvm/include/arm64/nested.h
@@ -38,6 +38,14 @@ struct vcpu {
        struct cpu_context context;
 };
 
+struct s2_mmu {
+       gpa_t pgd;
+       unsigned int vmid;
+       unsigned int page_size_shift;
+       u64 vtcr;
+       u64 ipa_bits;
+};
+
 /*
  * KVM has host_data and hyp_context, combine them because we're only doing
  * hyp context.
@@ -56,8 +64,13 @@ struct page_pool {
 size_t get_page_size(void);
 gpa_t alloc_page(struct page_pool *pp);
 bool has_tgran_2(u64 mmfr0, size_t size);
-void prepare_hyp(void);
+void prepare_hyp_no_s2(void);
+void prepare_hyp(struct s2_mmu *mmu);
 void init_vcpu(struct vcpu *vcpu, gpa_t l2_pc, gpa_t l2_stack_top);
+void create_s2_mapping(struct s2_mmu *mmu, u64 ipa, u64 pa, size_t size,
+                      struct page_pool *pp);
+void init_s2_mmu(struct s2_mmu *mmu, unsigned int vmid, gpa_t pgd,
+                size_t page_size, u64 ipa_bits);
 int run_l2(struct vcpu *vcpu, struct hyp_data *hyp_data);
 
 u64 do_hvc(u64 action, u64 arg1, u64 arg2);
diff --git a/tools/testing/selftests/kvm/lib/arm64/nested.c 
b/tools/testing/selftests/kvm/lib/arm64/nested.c
index cda41f355263..9848d607ef64 100644
--- a/tools/testing/selftests/kvm/lib/arm64/nested.c
+++ b/tools/testing/selftests/kvm/lib/arm64/nested.c
@@ -71,13 +71,22 @@ gpa_t alloc_page(struct page_pool *pp)
        }
 }
 
-void prepare_hyp(void)
+void prepare_hyp_no_s2(void)
 {
        write_sysreg(HCR_EL2_E2H | HCR_EL2_RW, hcr_el2);
        write_sysreg(hyp_vectors, vbar_el2);
        isb();
 }
 
+void prepare_hyp(struct s2_mmu *mmu)
+{
+       write_sysreg(mmu->vtcr, vtcr_el2);
+       write_sysreg(mmu->pgd | ((u64)mmu->vmid << 48), vttbr_el2);
+       write_sysreg(HCR_EL2_E2H | HCR_EL2_RW | HCR_EL2_VM, hcr_el2);
+       write_sysreg(hyp_vectors, vbar_el2);
+       isb();
+}
+
 void init_vcpu(struct vcpu *vcpu, gpa_t l2_pc, gpa_t l2_stack_top)
 {
        memset(vcpu, 0, sizeof(*vcpu));
@@ -86,6 +95,140 @@ void init_vcpu(struct vcpu *vcpu, gpa_t l2_pc, gpa_t 
l2_stack_top)
        vcpu->context.sys_regs[SP_EL1] = l2_stack_top;
 }
 
+static int stage2_levels(unsigned int page_size_shift, u64 ipa_bits)
+{
+       /* taken from ARM64_HW_PGTABLE_LEVELS(ipa) in KVM */
+       return (ipa_bits - 4) / (page_size_shift - 3);
+}
+
+static u64 get_index(struct s2_mmu *mmu, u64 ipa, int level)
+{
+       int width = mmu->page_size_shift - 3;
+       int shift_amount = mmu->page_size_shift + (3 - level) * width;
+
+       return (ipa >> shift_amount) & GENMASK_ULL(width - 1, 0);
+}
+
+static u64 pte_gpa_to_gva(u64 gpa)
+{
+       /*
+        * This depends on how the memory used for s2pt is mapped in GVA,
+        * currently it is assumed they are idmapped.
+        */
+       return gpa;
+}
+
+static u64 pte_to_pt_base(u64 pte)
+{
+       return pte & GENMASK_ULL(47, 12);
+}
+
+#define S2_PTE_AF              (1ULL << 10)
+#define S2_PTE_SH_INNER                (3ULL << 8)
+#define S2_PTE_S2AP_RW         (3ULL << 6)
+#define S2_PTE_ATTR_NORMAL_WB  (0xfULL << 2)
+#define S2_PTE_TYPE_TABLE      (1ULL << 1)
+#define S2_PTE_TYPE_PAGE       (1ULL << 1)
+#define S2_PTE_VALID           1ULL
+
+/* No block mappings for now. */
+static void create_one_s2_mapping(struct s2_mmu *mmu, u64 ipa, u64 pa,
+                                 struct page_pool *pp)
+{
+       int levels = stage2_levels(mmu->page_size_shift, mmu->ipa_bits);
+       u64 index, pte, pte_new, table_attr, page_attr;
+       gpa_t pte_addr, pt_base = mmu->pgd;
+
+       table_attr = S2_PTE_TYPE_TABLE | S2_PTE_VALID;
+       page_attr = S2_PTE_AF | S2_PTE_SH_INNER | S2_PTE_S2AP_RW |
+                   S2_PTE_ATTR_NORMAL_WB | S2_PTE_TYPE_PAGE | S2_PTE_VALID;
+
+       for (int level = 4 - levels; level <= 3; level++) {
+               index = get_index(mmu, ipa, level);
+               pte_addr = pt_base + index * 8;
+               pte = *((u64 *)pte_gpa_to_gva(pte_addr));
+
+               if (level == 3) {
+                       /* Last level, install leaf entry. */
+                       pte_new = pa & ~GENMASK_ULL(mmu->page_size_shift - 1, 
0);
+                       pte_new |= page_attr;
+                       *((u64 *)pte_gpa_to_gva(pte_addr)) = pte_new;
+               } else if (!(pte & S2_PTE_VALID)) {
+                       /* Empty next level table, allocate and install. */
+                       pte_new = alloc_page(pp);
+                       pte_new |= table_attr;
+                       *((u64 *)pte_gpa_to_gva(pte_addr)) = pte_new;
+                       pt_base = pte_to_pt_base(pte_new);
+               } else {
+                       /* Next level table found, descend into it. */
+                       pt_base = pte_to_pt_base(pte);
+               }
+       }
+}
+
+void create_s2_mapping(struct s2_mmu *mmu, u64 ipa, u64 pa, size_t size,
+                      struct page_pool *pp)
+{
+       u64 ipa_end;
+       u64 mask = pp->page_size - 1;
+
+       ipa_end = (ipa + size + mask) & ~mask;
+       ipa &= ~mask;
+       pa &= ~mask;
+
+       while (ipa < ipa_end) {
+               create_one_s2_mapping(mmu, ipa, pa, pp);
+               pa += pp->page_size;
+               ipa += pp->page_size;
+       }
+       dsb(ishst);
+}
+
+void init_s2_mmu(struct s2_mmu *mmu, unsigned int vmid, gpa_t pgd,
+                size_t page_size, u64 ipa_bits)
+{
+       u64 ps, tg0, sl0_base, mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+       int levels;
+
+       mmu->vmid = vmid;
+       mmu->pgd = pgd;
+       mmu->ipa_bits = ipa_bits;
+       mmu->vtcr = 0;
+
+       switch (page_size) {
+       case SZ_4K:
+               tg0 = VTCR_EL2_TG0_4K;
+               mmu->page_size_shift = 12;
+               sl0_base = 2;
+               break;
+       case SZ_16K:
+               tg0 = VTCR_EL2_TG0_16K;
+               mmu->page_size_shift = 14;
+               sl0_base = 3;
+               break;
+       case SZ_64K:
+       default:
+               tg0 = VTCR_EL2_TG0_64K;
+               mmu->page_size_shift = 16;
+               sl0_base = 3;
+               break;
+       }
+
+       levels = stage2_levels(mmu->page_size_shift, mmu->ipa_bits);
+       mmu->vtcr |= FIELD_PREP(VTCR_EL2_SL0, (sl0_base - (4 - levels)));
+
+       ps = SYS_FIELD_GET(ID_AA64MMFR0_EL1, PARANGE, mmfr0);
+       /* cap ps to 48-bit */
+       ps = ps > 0b0101 ? 0b0101 : ps;
+       mmu->vtcr |= VTCR_EL2_RES1 | SYS_FIELD_PREP(VTCR_EL2, PS, ps)           
|
+                                   SYS_FIELD_PREP(VTCR_EL2, TG0, tg0)         |
+                                   SYS_FIELD_PREP_ENUM(VTCR_EL2, SH0, INNER)  |
+                                   SYS_FIELD_PREP_ENUM(VTCR_EL2, ORGN0, WBWA) |
+                                   SYS_FIELD_PREP_ENUM(VTCR_EL2, IRGN0, WBWA);
+
+       mmu->vtcr |= FIELD_PREP(VTCR_EL2_T0SZ, 64 - ipa_bits);
+}
+
 void __sysreg_save_el1_state(struct cpu_context *ctxt)
 {
        ctxt->sys_regs[SP_EL1] = read_sysreg(sp_el1);
-- 
2.43.0


Reply via email to