On 6/6/24 07:02, Don Porter wrote:
+/**
+ * _for_each_pte - recursive helper function
+ *
+ * @cs - CPU state
+ * @fn(cs, data, pte, vaddr, height) - User-provided function to call on each
+ *                                     pte.
+ *   * @cs - pass through cs
+ *   * @data - user-provided, opaque pointer
+ *   * @pte - current pte
+ *   * @vaddr_in - virtual address translated by pte
+ *   * @height - height in the tree of pte
+ * @data - user-provided, opaque pointer, passed to fn()
+ * @visit_interior_nodes - if true, call fn() on page table entries in
+ *                         interior nodes.  If false, only call fn() on page
+ *                         table entries in leaves.
+ * @visit_not_present - if true, call fn() on entries that are not present.
+ *                         if false, visit only present entries.
+ * @node - The physical address of the current page table radix tree node
+ * @vaddr_in - The virtual address bits translated in walking the page
+ *          table to node
+ * @height - The height of node in the radix tree
+ *
+ * height starts at the max and counts down.
+ * In a 4 level x86 page table, pml4e is level 4, pdpe is level 3,
+ *  pde is level 2, and pte is level 1
+ *
+ * Returns true on success, false on error.
+ */
+static bool
+_for_each_pte(CPUState *cs,

External identifiers beginning with "_[a-z]" are reserved. While this is not external, and so is technically ok, it is still not a great idea. Use for_each_pte_$SUFFIX, with SUFFIX as int, internal, recurse or something.


+              int (*fn)(CPUState *cs, void *data, PTE_t *pte,
+                        vaddr vaddr_in, int height, int offset),
+              void *data, bool visit_interior_nodes,
+              bool visit_not_present, hwaddr node,
+              vaddr vaddr_in, int height)
+{
+    int ptes_per_node;
+    int i;
+
+    assert(height > 0);
+
+    CPUClass *cc = CPU_GET_CLASS(cs);
+
+    if ((!cc->sysemu_ops->page_table_entries_per_node)
+        || (!cc->sysemu_ops->get_pte)
+        || (!cc->sysemu_ops->pte_present)
+        || (!cc->sysemu_ops->pte_leaf)
+        || (!cc->sysemu_ops->pte_child)) {
+        return false;
+    }

Probably move this check to the non-recursive function, since it only needs to be done once. Following a check for page_table_root, should the rest be asserts? I.e. if a target implements one hook, it must implement them all?

CPU_GET_CLASS is non-trivial: it is cached in cs->cc.
It seems like you should cache cs->cc->sysemu_ops in a local to avoid constantly reloading the pointer across the loop.

+    ptes_per_node = cc->sysemu_ops->page_table_entries_per_node(cs, height);

It would be better if the entire page table format were computed by page_table_root. Perhaps fill in a whole structure rather than just height.

+    for (i = 0; i < ptes_per_node; i++) {
+        PTE_t pt_entry;
+        vaddr vaddr_i;
+        bool pte_present;
+
+        cc->sysemu_ops->get_pte(cs, node, i, height, &pt_entry, vaddr_in,
+                                &vaddr_i, NULL);
+        pte_present = cc->sysemu_ops->pte_present(cs, &pt_entry);
+
+        if (pte_present || visit_not_present) {
+            if ((!pte_present) || cc->sysemu_ops->pte_leaf(cs, height,
+                                                           &pt_entry)) {

Again, better to fill in a structure in get_pte rather than make 4 calls for vaddr, present, leaf, child.

Drop the unnecessary (), e.g. around !pte_present.

+/* Intended to become a generic PTE type */
+typedef union PTE {
+    uint64_t pte64_t;
+    uint32_t pte32_t;
+} PTE_t;

While not yet supported by qemu, the latest Arm v9.4 document contains a 
128-bit PTE.
Don't give the tag and the typedef different names.
Avoid "_t" as that's POSIX reserved namespace.

I know naming is hard, but there are many kinds of PTE in qemu -- better to use something more descriptive. Perhaps "QemuPageWalkerPTE"?

+bool for_each_pte(CPUState *cs,
+                  int (*fn)(CPUState *cs, void *data, PTE_t *pte, vaddr vaddr,
+                            int height, int offset), void *data,
+                  bool visit_interior_nodes, bool visit_not_present);

Use a typedef for the callback function.
qemu_page_walker_for_each?

+
+
  /**
   * CPUDumpFlags:
   * @CPU_DUMP_CODE:
diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h
index 24d003fe04..eb16a1c3e2 100644
--- a/include/hw/core/sysemu-cpu-ops.h
+++ b/include/hw/core/sysemu-cpu-ops.h
@@ -12,6 +12,39 @@
#include "hw/core/cpu.h" +/* Maximum supported page table height - currently x86 at 5 */
+#define MAX_HEIGHT 5
+
+/*
+ * struct mem_print_state: Used by monitor in walking page tables.
+ */
+struct mem_print_state {
+    Monitor *mon;
+    CPUArchState *env;
+    int vaw, paw; /* VA and PA width in characters */
+    int max_height;
+    bool (*flusher)(CPUState *cs, struct mem_print_state *state);
+    bool flush_interior; /* If false, only call flusher() on leaves */
+    bool require_physical_contiguity;
+    /*
+     * The height at which we started accumulating ranges, i.e., the
+     * next height we need to print once we hit the end of a
+     * contiguous range.
+     */
+    int start_height;
+    /*
+     * For compressing contiguous ranges, track the
+     * start and end of the range
+     */
+    hwaddr vstart[MAX_HEIGHT + 1]; /* Starting virt. addr. of open pte range */
+    hwaddr vend[MAX_HEIGHT + 1]; /* Ending virtual address of open pte range */
+    hwaddr pstart; /* Starting physical address of open pte range */
+    hwaddr pend; /* Ending physical address of open pte range */
+    int64_t ent[MAX_HEIGHT + 1]; /* PTE contents on current root->leaf path */

PTE_t (or the rename)?

+    int offset[MAX_HEIGHT + 1]; /* PTE range starting offsets */
+    int last_offset[MAX_HEIGHT + 1]; /* PTE range ending offsets */
+};
+
  /*
   * struct SysemuCPUOps: System operations specific to a CPU class
   */
@@ -87,6 +120,129 @@ typedef struct SysemuCPUOps {
       */
      const VMStateDescription *legacy_vmsd;
+ /**
+     * page_table_root - Given a CPUState, return the physical address
+     *                    of the current page table root, as well as
+     *                    write the height of the tree into *height.
+     *
+     * @cs - CPU state
+
+     * @height - a pointer to an integer, to store the page table tree
+     *           height
+     *
+     * Returns a hardware address on success.  Should not fail (i.e.,
+     * caller is responsible to ensure that a page table is actually
+     * present).
+     */
+    hwaddr (*page_table_root)(CPUState *cs, int *height);

There may be two page table roots on Arm, depending on the cpu state and the virtual address. The shape of the two page tables are independent so...


+
+    /**
+     * page_table_entries_per_node - Return the number of entries in a
+     *                                   page table node for the CPU
+     *                                   at a given height.
+     *
+     * @cs - CPU state
+     * @height - height of the page table tree to query, where the leaves
+     *          are 1.
+     *
+     * Returns a value greater than zero on success, -1 on error.
+     */
+    int (*page_table_entries_per_node)(CPUState *cs, int height);

... implementing this independently from page_table_root is not possible.


+
+    /**
+     * @mon_init_page_table_iterator: Callback to configure a page table
+     * iterator for use by a monitor function.
+     * Returns true on success, false if not supported (e.g., no paging 
disabled
+     * or not implemented on this CPU).
+     */
+    bool (*mon_init_page_table_iterator)(Monitor *mon,
+                                         struct mem_print_state *state);

I don't understand the purpose of this one as a target-specific hook.

+    /**
+     * @flush_page_table_iterator_state: Prints the last entry,
+     * if one is present.  Useful for iterators that aggregate information
+     * across page table entries.
+     */
+    bool (*mon_flush_page_print_state)(CPUState *cs,
+                                       struct mem_print_state *state);

Is this specific to "info pg" or not?
It appears to be, but the description suggests it is not.


r~

Reply via email to