On 6/6/24 07:02, Don Porter wrote:
+/**
+ * _for_each_pte - recursive helper function
+ *
+ * @cs - CPU state
+ * @fn(cs, data, pte, vaddr, height) - User-provided function to call on each
+ * pte.
+ * * @cs - pass through cs
+ * * @data - user-provided, opaque pointer
+ * * @pte - current pte
+ * * @vaddr_in - virtual address translated by pte
+ * * @height - height in the tree of pte
+ * @data - user-provided, opaque pointer, passed to fn()
+ * @visit_interior_nodes - if true, call fn() on page table entries in
+ * interior nodes. If false, only call fn() on page
+ * table entries in leaves.
+ * @visit_not_present - if true, call fn() on entries that are not present.
+ * if false, visit only present entries.
+ * @node - The physical address of the current page table radix tree node
+ * @vaddr_in - The virtual address bits translated in walking the page
+ * table to node
+ * @height - The height of node in the radix tree
+ *
+ * height starts at the max and counts down.
+ * In a 4 level x86 page table, pml4e is level 4, pdpe is level 3,
+ * pde is level 2, and pte is level 1
+ *
+ * Returns true on success, false on error.
+ */
+static bool
+_for_each_pte(CPUState *cs,
External identifiers beginning with "_[a-z]" are reserved. While this is not external,
and so is technically ok, it is still not a great idea. Use for_each_pte_$SUFFIX, with
SUFFIX as int, internal, recurse or something.
+ int (*fn)(CPUState *cs, void *data, PTE_t *pte,
+ vaddr vaddr_in, int height, int offset),
+ void *data, bool visit_interior_nodes,
+ bool visit_not_present, hwaddr node,
+ vaddr vaddr_in, int height)
+{
+ int ptes_per_node;
+ int i;
+
+ assert(height > 0);
+
+ CPUClass *cc = CPU_GET_CLASS(cs);
+
+ if ((!cc->sysemu_ops->page_table_entries_per_node)
+ || (!cc->sysemu_ops->get_pte)
+ || (!cc->sysemu_ops->pte_present)
+ || (!cc->sysemu_ops->pte_leaf)
+ || (!cc->sysemu_ops->pte_child)) {
+ return false;
+ }
Probably move this check to the non-recursive function, since it only needs to be done
once. Following a check for page_table_root, should the rest be asserts? I.e. if a
target implements one hook, it must implement them all?
CPU_GET_CLASS is non-trivial: it is cached in cs->cc.
It seems like you should cache cs->cc->sysemu_ops in a local to avoid constantly reloading
the pointer across the loop.
+ ptes_per_node = cc->sysemu_ops->page_table_entries_per_node(cs, height);
It would be better if the entire page table format were computed by page_table_root.
Perhaps fill in a whole structure rather than just height.
+ for (i = 0; i < ptes_per_node; i++) {
+ PTE_t pt_entry;
+ vaddr vaddr_i;
+ bool pte_present;
+
+ cc->sysemu_ops->get_pte(cs, node, i, height, &pt_entry, vaddr_in,
+ &vaddr_i, NULL);
+ pte_present = cc->sysemu_ops->pte_present(cs, &pt_entry);
+
+ if (pte_present || visit_not_present) {
+ if ((!pte_present) || cc->sysemu_ops->pte_leaf(cs, height,
+ &pt_entry)) {
Again, better to fill in a structure in get_pte rather than make 4 calls for vaddr,
present, leaf, child.
Drop the unnecessary (), e.g. around !pte_present.
+/* Intended to become a generic PTE type */
+typedef union PTE {
+ uint64_t pte64_t;
+ uint32_t pte32_t;
+} PTE_t;
While not yet supported by qemu, the latest Arm v9.4 document contains a
128-bit PTE.
Don't give the tag and the typedef different names.
Avoid "_t" as that's POSIX reserved namespace.
I know naming is hard, but there are many kinds of PTE in qemu -- better to use something
more descriptive. Perhaps "QemuPageWalkerPTE"?
+bool for_each_pte(CPUState *cs,
+ int (*fn)(CPUState *cs, void *data, PTE_t *pte, vaddr vaddr,
+ int height, int offset), void *data,
+ bool visit_interior_nodes, bool visit_not_present);
Use a typedef for the callback function.
qemu_page_walker_for_each?
+
+
/**
* CPUDumpFlags:
* @CPU_DUMP_CODE:
diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h
index 24d003fe04..eb16a1c3e2 100644
--- a/include/hw/core/sysemu-cpu-ops.h
+++ b/include/hw/core/sysemu-cpu-ops.h
@@ -12,6 +12,39 @@
#include "hw/core/cpu.h"
+/* Maximum supported page table height - currently x86 at 5 */
+#define MAX_HEIGHT 5
+
+/*
+ * struct mem_print_state: Used by monitor in walking page tables.
+ */
+struct mem_print_state {
+ Monitor *mon;
+ CPUArchState *env;
+ int vaw, paw; /* VA and PA width in characters */
+ int max_height;
+ bool (*flusher)(CPUState *cs, struct mem_print_state *state);
+ bool flush_interior; /* If false, only call flusher() on leaves */
+ bool require_physical_contiguity;
+ /*
+ * The height at which we started accumulating ranges, i.e., the
+ * next height we need to print once we hit the end of a
+ * contiguous range.
+ */
+ int start_height;
+ /*
+ * For compressing contiguous ranges, track the
+ * start and end of the range
+ */
+ hwaddr vstart[MAX_HEIGHT + 1]; /* Starting virt. addr. of open pte range */
+ hwaddr vend[MAX_HEIGHT + 1]; /* Ending virtual address of open pte range */
+ hwaddr pstart; /* Starting physical address of open pte range */
+ hwaddr pend; /* Ending physical address of open pte range */
+ int64_t ent[MAX_HEIGHT + 1]; /* PTE contents on current root->leaf path */
PTE_t (or the rename)?
+ int offset[MAX_HEIGHT + 1]; /* PTE range starting offsets */
+ int last_offset[MAX_HEIGHT + 1]; /* PTE range ending offsets */
+};
+
/*
* struct SysemuCPUOps: System operations specific to a CPU class
*/
@@ -87,6 +120,129 @@ typedef struct SysemuCPUOps {
*/
const VMStateDescription *legacy_vmsd;
+ /**
+ * page_table_root - Given a CPUState, return the physical address
+ * of the current page table root, as well as
+ * write the height of the tree into *height.
+ *
+ * @cs - CPU state
+
+ * @height - a pointer to an integer, to store the page table tree
+ * height
+ *
+ * Returns a hardware address on success. Should not fail (i.e.,
+ * caller is responsible to ensure that a page table is actually
+ * present).
+ */
+ hwaddr (*page_table_root)(CPUState *cs, int *height);
There may be two page table roots on Arm, depending on the cpu state and the virtual
address. The shape of the two page tables are independent so...
+
+ /**
+ * page_table_entries_per_node - Return the number of entries in a
+ * page table node for the CPU
+ * at a given height.
+ *
+ * @cs - CPU state
+ * @height - height of the page table tree to query, where the leaves
+ * are 1.
+ *
+ * Returns a value greater than zero on success, -1 on error.
+ */
+ int (*page_table_entries_per_node)(CPUState *cs, int height);
... implementing this independently from page_table_root is not possible.
+
+ /**
+ * @mon_init_page_table_iterator: Callback to configure a page table
+ * iterator for use by a monitor function.
+ * Returns true on success, false if not supported (e.g., no paging
disabled
+ * or not implemented on this CPU).
+ */
+ bool (*mon_init_page_table_iterator)(Monitor *mon,
+ struct mem_print_state *state);
I don't understand the purpose of this one as a target-specific hook.
+ /**
+ * @flush_page_table_iterator_state: Prints the last entry,
+ * if one is present. Useful for iterators that aggregate information
+ * across page table entries.
+ */
+ bool (*mon_flush_page_print_state)(CPUState *cs,
+ struct mem_print_state *state);
Is this specific to "info pg" or not?
It appears to be, but the description suggests it is not.
r~