[PATCH] libxg: shrink variable scope in xc_core_arch_map_p2m_list_rw()
This in particular allows to drop a dead assignment to "ptes" from near the end of the function. Coverity ID: 1532314 Fixes: bd7a29c3d0b9 ("tools/libs/ctrl: fix xc_core_arch_map_p2m() to support linear p2m table") Signed-off-by: Jan Beulich --- v2: Much bigger change to limit the scope of "ptes" and other variables. --- a/tools/libs/guest/xg_core_x86.c +++ b/tools/libs/guest/xg_core_x86.c @@ -92,11 +92,9 @@ xc_core_arch_map_p2m_list_rw(xc_interfac uint64_t p2m_cr3) { uint64_t p2m_vaddr, p2m_end, mask, off; -xen_pfn_t p2m_mfn, mfn, saved_mfn, max_pfn; -uint64_t *ptes = NULL; +xen_pfn_t p2m_mfn, saved_mfn; xen_pfn_t *mfns = NULL; -unsigned int fpp, n_pages, level, n_levels, shift, - idx_start, idx_end, idx, saved_idx; +unsigned int fpp, level, n_levels, idx_start, idx_end, saved_idx; p2m_vaddr = GET_FIELD(live_shinfo, arch.p2m_vaddr, dinfo->guest_width); fpp = PAGE_SIZE / dinfo->guest_width; @@ -152,8 +150,10 @@ xc_core_arch_map_p2m_list_rw(xc_interfac for ( level = n_levels; level > 0; level-- ) { -n_pages = idx_end - idx_start + 1; -ptes = xc_map_foreign_pages(xch, dom, PROT_READ, mfns, n_pages); +unsigned int n_pages = idx_end - idx_start + 1; +uint64_t *ptes = xc_map_foreign_pages(xch, dom, PROT_READ, mfns, n_pages); +unsigned int shift, idx; + if ( !ptes ) { PERROR("Failed to map %u page table pages for p2m list", n_pages); @@ -169,18 +169,21 @@ xc_core_arch_map_p2m_list_rw(xc_interfac if ( !mfns ) { ERROR("Cannot allocate memory for array of %u mfns", idx); +out_unmap: +munmap(ptes, n_pages * PAGE_SIZE); goto out; } for ( idx = idx_start; idx <= idx_end; idx++ ) { -mfn = (ptes[idx] & 0x000ff000ULL) >> PAGE_SHIFT; +xen_pfn_t mfn = (ptes[idx] & 0x000ff000ULL) >> PAGE_SHIFT; + if ( mfn == 0 ) { ERROR("Bad mfn %#lx during page table walk for vaddr %#" PRIx64 " at level %d of p2m list", mfn, off + ((uint64_t)idx << shift), level); errno = ERANGE; -goto out; +goto out_unmap; } mfns[idx - idx_start] = mfn; @@ -197,6 +200,8 @@ xc_core_arch_map_p2m_list_rw(xc_interfac if ( level == 2 ) { +xen_pfn_t max_pfn; + if ( saved_idx == idx_end ) saved_idx++; max_pfn = ((xen_pfn_t)saved_idx << 9) * fpp; @@ -210,7 +215,6 @@ xc_core_arch_map_p2m_list_rw(xc_interfac } munmap(ptes, n_pages * PAGE_SIZE); -ptes = NULL; off = p2m_vaddr & ((mask >> shift) << shift); } @@ -218,8 +222,6 @@ xc_core_arch_map_p2m_list_rw(xc_interfac out: free(mfns); -if ( ptes ) -munmap(ptes, n_pages * PAGE_SIZE); return NULL; }
[PATCH] xen: Append a newline character to panic() where missing
Missing newline is inconsistent with the rest of the callers, since panic() expects it. Signed-off-by: Michal Orzel --- xen/arch/arm/bootfdt.c| 2 +- xen/arch/arm/domain_build.c | 6 +++--- xen/arch/x86/cpu/microcode/core.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c index b6f92a174f5f..2673ad17a1e1 100644 --- a/xen/arch/arm/bootfdt.c +++ b/xen/arch/arm/bootfdt.c @@ -225,7 +225,7 @@ static int __init process_reserved_memory_node(const void *fdt, int node, size_cells, data); if ( rc == -ENOSPC ) -panic("Max number of supported reserved-memory regions reached."); +panic("Max number of supported reserved-memory regions reached.\n"); else if ( rc != -ENOENT ) return rc; return 0; diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c index 579bc8194fed..d0d6be922db1 100644 --- a/xen/arch/arm/domain_build.c +++ b/xen/arch/arm/domain_build.c @@ -74,7 +74,7 @@ int __init parse_arch_dom0_param(const char *s, const char *e) return 0; #else -panic("'sve' property found, but CONFIG_ARM64_SVE not selected"); +panic("'sve' property found, but CONFIG_ARM64_SVE not selected\n"); #endif } @@ -697,7 +697,7 @@ static void __init allocate_static_memory(struct domain *d, return; fail: -panic("Failed to allocate requested static memory for domain %pd.", d); +panic("Failed to allocate requested static memory for domain %pd.\n", d); } /* @@ -769,7 +769,7 @@ static void __init assign_static_memory_11(struct domain *d, return; fail: -panic("Failed to assign requested static memory for direct-map domain %pd.", +panic("Failed to assign requested static memory for direct-map domain %pd.\n", d); } diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c index e65af4b82ea3..c3fee629063e 100644 --- a/xen/arch/x86/cpu/microcode/core.c +++ b/xen/arch/x86/cpu/microcode/core.c @@ -524,7 +524,7 @@ static int control_thread_fn(const struct microcode_patch *patch) */ if ( wait_for_condition(wait_cpu_callout, (done + 1), MICROCODE_UPDATE_TIMEOUT_US) ) -panic("Timeout when finished updating microcode (finished %u/%u)", +panic("Timeout when finished updating microcode (finished %u/%u)\n", done, nr_cores); /* Print warning message once if long time is spent here */ base-commit: 2f69ef96801f0d2b9646abf6396e60f99c56e3a0 -- 2.25.1
[PATCH v4 0/2] x86: xen: add missing prototypes
Avoid missing prototype warnings. Arnd Bergmann (1): x86: xen: add missing prototypes Juergen Gross (1): x86/xen: add prototypes for paravirt mmu functions arch/x86/xen/efi.c | 2 ++ arch/x86/xen/mmu_pv.c | 16 arch/x86/xen/smp.h | 4 arch/x86/xen/smp_pv.c | 1 - arch/x86/xen/xen-ops.h | 3 +++ include/xen/xen.h | 3 +++ 6 files changed, 28 insertions(+), 1 deletion(-) -- 2.35.3
[PATCH v4 1/2] x86/xen: add prototypes for paravirt mmu functions
The paravirt MMU functions called via the PV_CALLEE_SAVE_REGS_THUNK() macro can't be defined to be static, as the macro is generating a function via asm() statement calling the paravirt MMU function. In order to avoid warnings when specifying "-Wmissing-prototypes" for the build, add local prototypes (there should never be any external caller of those functions). Reported-by: Arnd Bergmann Signed-off-by: Juergen Gross --- arch/x86/xen/mmu_pv.c | 16 1 file changed, 16 insertions(+) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index b3b8d289b9ab..e0a975165de7 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -86,6 +86,22 @@ #include "mmu.h" #include "debugfs.h" +/* + * Prototypes for functions called via PV_CALLEE_SAVE_REGS_THUNK() in order + * to avoid warnings with "-Wmissing-prototypes". + */ +pteval_t xen_pte_val(pte_t pte); +pgdval_t xen_pgd_val(pgd_t pgd); +pmdval_t xen_pmd_val(pmd_t pmd); +pudval_t xen_pud_val(pud_t pud); +p4dval_t xen_p4d_val(p4d_t p4d); +pte_t xen_make_pte(pteval_t pte); +pgd_t xen_make_pgd(pgdval_t pgd); +pmd_t xen_make_pmd(pmdval_t pmd); +pud_t xen_make_pud(pudval_t pud); +p4d_t xen_make_p4d(p4dval_t p4d); +pte_t xen_make_pte_init(pteval_t pte); + #ifdef CONFIG_X86_VSYSCALL_EMULATION /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; -- 2.35.3
[PATCH v4 2/2] x86: xen: add missing prototypes
From: Arnd Bergmann These function are all called from assembler files, or from inline assembler, so there is no immediate need for a prototype in a header, but if -Wmissing-prototypes is enabled, the compiler warns about them: arch/x86/xen/efi.c:130:13: error: no previous prototype for 'xen_efi_init' [-Werror=missing-prototypes] arch/x86/platform/pvh/enlighten.c:120:13: error: no previous prototype for 'xen_prepare_pvh' [-Werror=missing-prototypes] arch/x86/xen/enlighten_pv.c:1233:34: error: no previous prototype for 'xen_start_kernel' [-Werror=missing-prototypes] arch/x86/xen/irq.c:22:14: error: no previous prototype for 'xen_force_evtchn_callback' [-Werror=missing-prototypes] arch/x86/entry/common.c:302:24: error: no previous prototype for 'xen_pv_evtchn_do_upcall' [-Werror=missing-prototypes] Declare all of them in an appropriate header file to avoid the warnings. For consistency, also move the asm_cpu_bringup_and_idle() declaration out of smp_pv.c. Signed-off-by: Arnd Bergmann Signed-off-by: Juergen Gross --- v4: [jgross]: remove some stubs, carve out mmu_pv.c prototypes into other patch v3: move declations of conditional function in an #ifdef with a stub v2: fix up changelog --- arch/x86/xen/efi.c | 2 ++ arch/x86/xen/smp.h | 4 arch/x86/xen/smp_pv.c | 1 - arch/x86/xen/xen-ops.h | 3 +++ include/xen/xen.h | 3 +++ 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 7d7ffb9c826a..863d0d6b3edc 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -16,6 +16,8 @@ #include #include +#include "xen-ops.h" + static efi_char16_t vendor[100] __initdata; static efi_system_table_t efi_systab_xen __initdata = { diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index 22fb982ff971..c20cbb14c82b 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -2,6 +2,10 @@ #ifndef _XEN_SMP_H #ifdef CONFIG_SMP + +void asm_cpu_bringup_and_idle(void); +asmlinkage void cpu_bringup_and_idle(void); + extern void xen_send_IPI_mask(const struct cpumask *mask, int vector); extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask, diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index a9cf8c8fa074..9ccf9759870d 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -55,7 +55,6 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 }; static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id); -void asm_cpu_bringup_and_idle(void); static void cpu_bringup(void) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a10903785a33..1bbbe216e6e1 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -148,9 +148,12 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), void xen_pin_vcpu(int cpu); void xen_emergency_restart(void); +void xen_force_evtchn_callback(void); + #ifdef CONFIG_XEN_PV void xen_pv_pre_suspend(void); void xen_pv_post_suspend(int suspend_cancelled); +void xen_start_kernel(struct start_info *si); #else static inline void xen_pv_pre_suspend(void) {} static inline void xen_pv_post_suspend(int suspend_cancelled) {} diff --git a/include/xen/xen.h b/include/xen/xen.h index 0efeb652f9b8..f989162983c3 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -31,6 +31,9 @@ extern uint32_t xen_start_flags; #include extern struct hvm_start_info pvh_start_info; +void xen_prepare_pvh(void); +struct pt_regs; +void xen_pv_evtchn_do_upcall(struct pt_regs *regs); #ifdef CONFIG_XEN_DOM0 #include -- 2.35.3
[PATCH 1/2] tools/console: Add escape argument to configure escape character
From: Peter Hoyes Dom0 may be accessed via telnet, meaning the default escape character (which is the same as telnet's) cannot be directly used to exit the console. It would be helpful to make the escape character customizable in such use cases. Add --escape argument to console tool for this purpose. Create parse_escape_character static function to convert a character string (which may include a '^' modifier) into an ANSI integer. Add argument to getopt options, parse escape character and pass value to console_loop. If --escape is not specified, it falls back to the existing behavior using DEFAULT_ESCAPE_SEQUENCE. Issue-Id: SCM-4958 Signed-off-by: Peter Hoyes Change-Id: I3795e654b382e78144d8210f303e3ebccec457ed --- tools/console/client/main.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/console/client/main.c b/tools/console/client/main.c index 6775006488..fb7cfb04b5 100644 --- a/tools/console/client/main.c +++ b/tools/console/client/main.c @@ -42,7 +42,7 @@ #include #include "xenctrl.h" -#define ESCAPE_CHARACTER 0x1d +#define DEFAULT_ESCAPE_CHARACTER 0x1d static volatile sig_atomic_t received_signal = 0; static char lockfile[sizeof (XEN_LOCK_DIR "/xenconsole.") + 8] = { 0 }; @@ -77,6 +77,7 @@ static void usage(const char *program) { " -n, --num N use console number N\n" " --type TYPE console type. must be 'pv', 'serial' or 'vuart'\n" " --start-notify-fd N file descriptor used to notify parent\n" + " --escape E escape sequence to exit console\n" , program); } @@ -174,7 +175,7 @@ static void restore_term(int fd, struct termios *old) } static int console_loop(int fd, struct xs_handle *xs, char *pty_path, - bool interactive) + bool interactive, char escape_character) { int ret, xs_fd = xs_fileno(xs), max_fd = -1; @@ -215,7 +216,7 @@ static int console_loop(int fd, struct xs_handle *xs, char *pty_path, char msg[60]; len = read(STDIN_FILENO, msg, sizeof(msg)); - if (len == 1 && msg[0] == ESCAPE_CHARACTER) { + if (len == 1 && msg[0] == escape_character) { return 0; } @@ -335,6 +336,7 @@ int main(int argc, char **argv) { "help",0, 0, 'h' }, { "start-notify-fd", 1, 0, 's' }, { "interactive", 0, 0, 'i' }, + { "escape", 1, 0, 'e' }, { 0 }, }; @@ -345,6 +347,7 @@ int main(int argc, char **argv) console_type type = CONSOLE_INVAL; bool interactive = 0; const char *console_names = "serial, pv, vuart"; + char escape_character = DEFAULT_ESCAPE_CHARACTER; while((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { switch(ch) { @@ -375,6 +378,12 @@ int main(int argc, char **argv) case 'i': interactive = 1; break; + case 'e': + if (optarg[0] == '^') + escape_character = optarg[1] & 0x1f; + else + escape_character = optarg[0]; + break; default: fprintf(stderr, "Invalid argument\n"); fprintf(stderr, "Try `%s --help' for more information.\n", @@ -493,7 +502,7 @@ int main(int argc, char **argv) close(start_notify_fd); } - console_loop(spty, xs, path, interactive); + console_loop(spty, xs, path, interactive, escape_character); free(path); free(dom_path); -- 2.34.1
[PATCH 2/2] xl: Add escape character argument to xl console
From: Peter Hoyes Add -e argument to xl console and pass to new escape_character argument of libxl_console_exec. In libxl_console_exec, there are currently two call sites to execl, which uses varargs, in order to support optionally passing 'start-notify-fd' to the console client. In order to support passing the 'escape' argument optionally too, refactor to instead have a single call site to execv, which has the same behavior but takes an array of arguments. If -e is not specified, --escape is not passed to the console client and the existing value (^]) is used as a default. Issue-Id: SCM-4958 Signed-off-by: Peter Hoyes Change-Id: I80ffbb7fecc63e023ec1c7f1e4ad7979ccf8da44 --- tools/include/libxl.h| 8 ++-- tools/libs/light/libxl_console.c | 30 ++ tools/xl/xl_cmdtable.c | 3 ++- tools/xl/xl_console.c| 10 +++--- tools/xl/xl_vmcontrol.c | 2 +- 5 files changed, 38 insertions(+), 15 deletions(-) diff --git a/tools/include/libxl.h b/tools/include/libxl.h index cac641a7eb..c513c39483 100644 --- a/tools/include/libxl.h +++ b/tools/include/libxl.h @@ -1958,7 +1958,8 @@ int libxl_vncviewer_exec(libxl_ctx *ctx, uint32_t domid, int autopass); * the caller that it has connected to the guest console. */ int libxl_console_exec(libxl_ctx *ctx, uint32_t domid, int cons_num, - libxl_console_type type, int notify_fd); + libxl_console_type type, int notify_fd, + char* escape_character); /* libxl_primary_console_exec finds the domid and console number * corresponding to the primary console of the given vm, then calls * libxl_console_exec with the right arguments (domid might be different @@ -1968,9 +1969,12 @@ int libxl_console_exec(libxl_ctx *ctx, uint32_t domid, int cons_num, * guests using pygrub. * If notify_fd is not -1, xenconsole will write 0x00 to it to nofity * the caller that it has connected to the guest console. + * If escape_character is not NULL, the provided value is used to exit + * the guest console. */ int libxl_primary_console_exec(libxl_ctx *ctx, uint32_t domid_vm, - int notify_fd); + int notify_fd, + char* escape_character); #if defined(LIBXL_API_VERSION) && LIBXL_API_VERSION < 0x040800 diff --git a/tools/libs/light/libxl_console.c b/tools/libs/light/libxl_console.c index f497be141b..0b7293fe71 100644 --- a/tools/libs/light/libxl_console.c +++ b/tools/libs/light/libxl_console.c @@ -52,7 +52,8 @@ out: } int libxl_console_exec(libxl_ctx *ctx, uint32_t domid, int cons_num, - libxl_console_type type, int notify_fd) + libxl_console_type type, int notify_fd, + char* escape_character) { GC_INIT(ctx); char *p = GCSPRINTF("%s/xenconsole", libxl__private_bindir_path()); @@ -75,15 +76,26 @@ int libxl_console_exec(libxl_ctx *ctx, uint32_t domid, int cons_num, goto out; } +char *args[] = { +p, domid_s, "--num", cons_num_s, "--type", cons_type_s, +NULL, NULL, NULL, NULL, // start-notify-fd, escape +NULL, // list terminator - do not use +}; +char **args_extra = args + 6; + if (notify_fd != -1) { notify_fd_s = GCSPRINTF("%d", notify_fd); -execl(p, p, domid_s, "--num", cons_num_s, "--type", cons_type_s, - "--start-notify-fd", notify_fd_s, (void *)NULL); -} else { -execl(p, p, domid_s, "--num", cons_num_s, "--type", cons_type_s, - (void *)NULL); +*args_extra++ = "--start-notify-fd"; +*args_extra++ = notify_fd_s; } +if (escape_character) { +*args_extra++ = "--escape"; +*args_extra++ = escape_character; +} + +execv(p, args); + out: GC_FREE; return ERROR_FAIL; @@ -156,7 +168,8 @@ out: return rc; } -int libxl_primary_console_exec(libxl_ctx *ctx, uint32_t domid_vm, int notify_fd) +int libxl_primary_console_exec(libxl_ctx *ctx, uint32_t domid_vm, int notify_fd, + char* escape_character) { uint32_t domid; int cons_num; @@ -165,7 +178,8 @@ int libxl_primary_console_exec(libxl_ctx *ctx, uint32_t domid_vm, int notify_fd) rc = libxl__primary_console_find(ctx, domid_vm, &domid, &cons_num, &type); if ( rc ) return rc; -return libxl_console_exec(ctx, domid, cons_num, type, notify_fd); +return libxl_console_exec(ctx, domid, cons_num, type, notify_fd, + escape_character); } int libxl_primary_console_get_tty(libxl_ctx *ctx, uint32_t domid_vm, diff --git a/tools/xl/xl_cmdtable.c b/tools/xl/xl_cmdtable.c index ccf4d83584..67604e9536 100644 --- a/tools/xl/xl_cmdtable.c +++ b/tools/xl/xl_cmdtable.c @@ -141,7 +141,8 @@ const struct cmd_spec cmd_table[] = { "Attach to domain's console", "[options] \n" "
Re: [PATCH] xen: Append a newline character to panic() where missing
> On 14 Jun 2023, at 08:30, Michal Orzel wrote: > > Missing newline is inconsistent with the rest of the callers, since > panic() expects it. > > Signed-off-by: Michal Orzel Reviewed-by: Luca Fancellu > --- > xen/arch/arm/bootfdt.c| 2 +- > xen/arch/arm/domain_build.c | 6 +++--- > xen/arch/x86/cpu/microcode/core.c | 2 +- > 3 files changed, 5 insertions(+), 5 deletions(-) > > diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c > index b6f92a174f5f..2673ad17a1e1 100644 > --- a/xen/arch/arm/bootfdt.c > +++ b/xen/arch/arm/bootfdt.c > @@ -225,7 +225,7 @@ static int __init process_reserved_memory_node(const void > *fdt, int node, > size_cells, data); > > if ( rc == -ENOSPC ) > -panic("Max number of supported reserved-memory regions reached."); > +panic("Max number of supported reserved-memory regions reached.\n"); > else if ( rc != -ENOENT ) > return rc; > return 0; > diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > index 579bc8194fed..d0d6be922db1 100644 > --- a/xen/arch/arm/domain_build.c > +++ b/xen/arch/arm/domain_build.c > @@ -74,7 +74,7 @@ int __init parse_arch_dom0_param(const char *s, const char > *e) > > return 0; > #else > -panic("'sve' property found, but CONFIG_ARM64_SVE not selected"); > +panic("'sve' property found, but CONFIG_ARM64_SVE not selected\n"); Sorry about that! I’ve missed it > #endif > } > > @@ -697,7 +697,7 @@ static void __init allocate_static_memory(struct domain > *d, > return; > > fail: > -panic("Failed to allocate requested static memory for domain %pd.", d); > +panic("Failed to allocate requested static memory for domain %pd.\n", d); > } > > /* > @@ -769,7 +769,7 @@ static void __init assign_static_memory_11(struct domain > *d, > return; > > fail: > -panic("Failed to assign requested static memory for direct-map domain > %pd.", > +panic("Failed to assign requested static memory for direct-map domain > %pd.\n", > d); > } > > diff --git a/xen/arch/x86/cpu/microcode/core.c > b/xen/arch/x86/cpu/microcode/core.c > index e65af4b82ea3..c3fee629063e 100644 > --- a/xen/arch/x86/cpu/microcode/core.c > +++ b/xen/arch/x86/cpu/microcode/core.c > @@ -524,7 +524,7 @@ static int control_thread_fn(const struct microcode_patch > *patch) > */ > if ( wait_for_condition(wait_cpu_callout, (done + 1), > MICROCODE_UPDATE_TIMEOUT_US) ) > -panic("Timeout when finished updating microcode (finished > %u/%u)", > +panic("Timeout when finished updating microcode (finished > %u/%u)\n", > done, nr_cores); > > /* Print warning message once if long time is spent here */ > > base-commit: 2f69ef96801f0d2b9646abf6396e60f99c56e3a0 > -- > 2.25.1 > >
Re: [PATCH] xen: Append a newline character to panic() where missing
On 14.06.2023 09:45, Luca Fancellu wrote: >> On 14 Jun 2023, at 08:30, Michal Orzel wrote: >> >> Missing newline is inconsistent with the rest of the callers, since >> panic() expects it. >> >> Signed-off-by: Michal Orzel > > Reviewed-by: Luca Fancellu Acked-by: Jan Beulich
Re: [PATCH] xen: Append a newline character to panic() where missing
On 14/06/2023 8:30 am, Michal Orzel wrote: > Missing newline is inconsistent with the rest of the callers, since > panic() expects it. > > Signed-off-by: Michal Orzel Acked-by: Andrew Cooper although... > --- > xen/arch/arm/bootfdt.c| 2 +- > xen/arch/arm/domain_build.c | 6 +++--- > xen/arch/x86/cpu/microcode/core.c | 2 +- > 3 files changed, 5 insertions(+), 5 deletions(-) > > diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c > index b6f92a174f5f..2673ad17a1e1 100644 > --- a/xen/arch/arm/bootfdt.c > +++ b/xen/arch/arm/bootfdt.c > @@ -225,7 +225,7 @@ static int __init process_reserved_memory_node(const void > *fdt, int node, > size_cells, data); > > if ( rc == -ENOSPC ) > -panic("Max number of supported reserved-memory regions reached."); > +panic("Max number of supported reserved-memory regions reached.\n"); Trailing punctuation like . or ! is useless. Most messages don't have them, and it just takes up space in .rodata, the console ring, and time on the UART. I'd recommend dropping the ones you modify, and/or cleaning it up more widely. ~Andrew
Re: [PATCH] xen: Append a newline character to panic() where missing
On 14/06/2023 10:04, Andrew Cooper wrote: > > > On 14/06/2023 8:30 am, Michal Orzel wrote: >> Missing newline is inconsistent with the rest of the callers, since >> panic() expects it. >> >> Signed-off-by: Michal Orzel > > Acked-by: Andrew Cooper > > although... > >> --- >> xen/arch/arm/bootfdt.c| 2 +- >> xen/arch/arm/domain_build.c | 6 +++--- >> xen/arch/x86/cpu/microcode/core.c | 2 +- >> 3 files changed, 5 insertions(+), 5 deletions(-) >> >> diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c >> index b6f92a174f5f..2673ad17a1e1 100644 >> --- a/xen/arch/arm/bootfdt.c >> +++ b/xen/arch/arm/bootfdt.c >> @@ -225,7 +225,7 @@ static int __init process_reserved_memory_node(const >> void *fdt, int node, >> size_cells, data); >> >> if ( rc == -ENOSPC ) >> -panic("Max number of supported reserved-memory regions reached."); >> +panic("Max number of supported reserved-memory regions reached.\n"); > > Trailing punctuation like . or ! is useless. Most messages don't have > them, and it just takes up space in .rodata, the console ring, and time > on the UART. > > I'd recommend dropping the ones you modify, and/or cleaning it up more > widely. I will keep in mind to do that in global scope in the next patch. We also have quite a lot of printk() with trailing punctuation. ~Michal
[PATCH] iommu/amd-vi: adjust _amd_iommu_flush_pages() to handle pseudo-domids
When the passed domain is DomIO iterate over the list of DomIO assigned devices and flush each pseudo-domid found. invalidate_all_domain_pages() does call amd_iommu_flush_all_pages() with DomIO as a parameter, and hence the underlying _amd_iommu_flush_pages() implementation must be capable of flushing all pseudo-domids used by the quarantine domain logic. While there fix invalidate_all_domain_pages() to only attempt to flush the domains that have IOMMU enabled, otherwise the flush is pointless. Fixes: 14dd241aad8a ('IOMMU/x86: use per-device page tables for quarantining') Signed-off-by: Roger Pau Monné --- Only build tested, as I don't have an AMD system that I can do suspend/resume on. --- xen/drivers/passthrough/amd/iommu_cmd.c | 29 xen/drivers/passthrough/amd/iommu_init.c | 4 +++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c index 40ddf366bb4d..ff55e3b88ae6 100644 --- a/xen/drivers/passthrough/amd/iommu_cmd.c +++ b/xen/drivers/passthrough/amd/iommu_cmd.c @@ -330,13 +330,34 @@ static void _amd_iommu_flush_pages(struct domain *d, daddr_t daddr, unsigned int order) { struct amd_iommu *iommu; -unsigned int dom_id = d->domain_id; /* send INVALIDATE_IOMMU_PAGES command */ -for_each_amd_iommu ( iommu ) +if ( d != dom_io ) { -invalidate_iommu_pages(iommu, daddr, dom_id, order); -flush_command_buffer(iommu, 0); +for_each_amd_iommu ( iommu ) +{ +invalidate_iommu_pages(iommu, daddr, d->domain_id, order); +flush_command_buffer(iommu, 0); +} +} +else +{ +const struct pci_dev *pdev; + +for_each_pdev(dom_io, pdev) +if ( pdev->arch.pseudo_domid != DOMID_INVALID ) +{ +iommu = find_iommu_for_device(pdev->sbdf.seg, pdev->sbdf.bdf); +if ( !iommu ) +{ +ASSERT_UNREACHABLE(); +continue; +} + +invalidate_iommu_pages(iommu, daddr, pdev->arch.pseudo_domid, + order); +flush_command_buffer(iommu, 0); +} } if ( ats_enabled ) diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c index 7dbd7e7d094a..af6713d2fc02 100644 --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -1532,8 +1532,10 @@ int __init amd_iommu_init_late(void) static void invalidate_all_domain_pages(void) { struct domain *d; + for_each_domain( d ) -amd_iommu_flush_all_pages(d); +if ( is_iommu_enabled(d) ) +amd_iommu_flush_all_pages(d); } static int cf_check _invalidate_all_devices( -- 2.40.0
compat code lacks support for __attribute__
I would like to share code between the hypervisor and xenalyze, so that every definition comes from a single place and is not duplicated in xen and xenalyze. This works as long as simple constructs like struct x {}; are used. Depending on the data to be stored into the trace buffer, having support for __packed may reduce the amount of data that need to be copied. The tooling currently converts ___attribute__ into compat___attribute__. Was support for __attribute__ intentionally left out, or was there just no usecase for it? Olaf +++ b/xen/include/public/trace.h @@ -80,6 +80,14 @@ #define TRC_LOST_RECORDS(TRC_GEN + 1) #define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2) #define TRC_TRACE_CPU_CHANGE(TRC_GEN + 3) +#define TRC_a (TRC_GEN + 123) +struct __attribute__((__packed__)) trc_a { +unsigned a; +}; +#define TRC_b (TRC_GEN + 321) +typedef struct __attribute__((__packed__)) trc_b { +unsigned b; +} trc_b_t; #define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1) #define TRC_SCHED_CONTINUE_RUNNING (TRC_SCHED_MIN + 2) pgp8GlNlLJgFF.pgp Description: Digitale Signatur von OpenPGP
Re: [PATCH] xen: Append a newline character to panic() where missing
On 14/06/2023 09:09, Michal Orzel wrote: On 14/06/2023 10:04, Andrew Cooper wrote: On 14/06/2023 8:30 am, Michal Orzel wrote: Missing newline is inconsistent with the rest of the callers, since panic() expects it. Signed-off-by: Michal Orzel Acked-by: Andrew Cooper although... --- xen/arch/arm/bootfdt.c| 2 +- xen/arch/arm/domain_build.c | 6 +++--- xen/arch/x86/cpu/microcode/core.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c index b6f92a174f5f..2673ad17a1e1 100644 --- a/xen/arch/arm/bootfdt.c +++ b/xen/arch/arm/bootfdt.c @@ -225,7 +225,7 @@ static int __init process_reserved_memory_node(const void *fdt, int node, size_cells, data); if ( rc == -ENOSPC ) -panic("Max number of supported reserved-memory regions reached."); +panic("Max number of supported reserved-memory regions reached.\n"); Trailing punctuation like . or ! is useless. Most messages don't have them, and it just takes up space in .rodata, the console ring, and time on the UART. I'd recommend dropping the ones you modify, and/or cleaning it up more widely. I will keep in mind to do that in global scope in the next patch. We also have quite a lot of printk() with trailing punctuation. This is quite a bit of churn and I am unconvinced this is necessary. Also, if the others want such style, then this should be agreed on in the CODING_STYLE first. Otherwise, I am afraid this is not something I will enforce because I see limited value. Cheers, -- Julien Grall
Re: [PATCH] xen: Append a newline character to panic() where missing
On 14/06/2023 11:02, Julien Grall wrote: > > > On 14/06/2023 09:09, Michal Orzel wrote: >> >> >> On 14/06/2023 10:04, Andrew Cooper wrote: >>> >>> >>> On 14/06/2023 8:30 am, Michal Orzel wrote: Missing newline is inconsistent with the rest of the callers, since panic() expects it. Signed-off-by: Michal Orzel >>> >>> Acked-by: Andrew Cooper >>> >>> although... >>> --- xen/arch/arm/bootfdt.c| 2 +- xen/arch/arm/domain_build.c | 6 +++--- xen/arch/x86/cpu/microcode/core.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c index b6f92a174f5f..2673ad17a1e1 100644 --- a/xen/arch/arm/bootfdt.c +++ b/xen/arch/arm/bootfdt.c @@ -225,7 +225,7 @@ static int __init process_reserved_memory_node(const void *fdt, int node, size_cells, data); if ( rc == -ENOSPC ) -panic("Max number of supported reserved-memory regions reached."); +panic("Max number of supported reserved-memory regions reached.\n"); >>> >>> Trailing punctuation like . or ! is useless. Most messages don't have >>> them, and it just takes up space in .rodata, the console ring, and time >>> on the UART. >>> >>> I'd recommend dropping the ones you modify, and/or cleaning it up more >>> widely. >> I will keep in mind to do that in global scope in the next patch. >> We also have quite a lot of printk() with trailing punctuation. > > This is quite a bit of churn and I am unconvinced this is necessary. > Also, if the others want such style, then this should be agreed on in > the CODING_STYLE first. Otherwise, I am afraid this is not something I > will enforce because I see limited value. > I then suggest to take this patch as is if you are also happy with it like others. ~Michal
Re: [PATCH] xen/evtchn: Purge ERROR_EXIT{,_DOM}()
Hi Jan, On 14/06/2023 07:59, Jan Beulich wrote: On 13.06.2023 18:22, Andrew Cooper wrote: These are disliked specifically by MISRA, but they also interfere with code legibility by hiding control flow. Expand and drop them. * Rearrange the order of actions to write into rc, then render rc in the gdprintk(). * Drop redundant "rc = rc" assignments * Switch to using %pd for rendering domains With this change, ... No functional change. Resulting binary is identical. ... I doubt this. Even .text being entirely identical would be pure luck, as message offsets might change slightly depending on how much padding the compiler inserts between them. Furthermore I wonder whether ... @@ -336,7 +319,11 @@ int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc, evtchn_port_t port) port = rc = evtchn_get_port(d, port); if ( rc < 0 ) -ERROR_EXIT(rc); +{ +gdprintk(XENLOG_WARNING, "EVTCHNOP failure: error %d\n", rc); +goto out; +} ... it wouldn't make sense to mention the actual operation that failed, now that each function has its own message(s). In turn I question the usesfulness of "error" in the message text. Then again I wonder whether it isn't time to purge these gdprintk()s altogether. Surely they served a purpose for bringing up initial Linux and mini-os and alike, but that's been two decades ago now. There are still port of new OS on Xen (e.g. QNX, FreeRTOS...) happening time to time. Also, having messages in error path is something I wish most of Xen had. Quite often when I end up to debug an hypercall, I will add printks(). So I am not in favor of removing the gdprintk()s. Cheers, -- Julien Grall
Re: [PATCH] xen: Append a newline character to panic() where missing
On 14/06/2023 10:06, Michal Orzel wrote: On 14/06/2023 11:02, Julien Grall wrote: On 14/06/2023 09:09, Michal Orzel wrote: On 14/06/2023 10:04, Andrew Cooper wrote: On 14/06/2023 8:30 am, Michal Orzel wrote: Missing newline is inconsistent with the rest of the callers, since panic() expects it. Signed-off-by: Michal Orzel Acked-by: Andrew Cooper although... --- xen/arch/arm/bootfdt.c| 2 +- xen/arch/arm/domain_build.c | 6 +++--- xen/arch/x86/cpu/microcode/core.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c index b6f92a174f5f..2673ad17a1e1 100644 --- a/xen/arch/arm/bootfdt.c +++ b/xen/arch/arm/bootfdt.c @@ -225,7 +225,7 @@ static int __init process_reserved_memory_node(const void *fdt, int node, size_cells, data); if ( rc == -ENOSPC ) -panic("Max number of supported reserved-memory regions reached."); +panic("Max number of supported reserved-memory regions reached.\n"); Trailing punctuation like . or ! is useless. Most messages don't have them, and it just takes up space in .rodata, the console ring, and time on the UART. I'd recommend dropping the ones you modify, and/or cleaning it up more widely. I will keep in mind to do that in global scope in the next patch. We also have quite a lot of printk() with trailing punctuation. This is quite a bit of churn and I am unconvinced this is necessary. Also, if the others want such style, then this should be agreed on in the CODING_STYLE first. Otherwise, I am afraid this is not something I will enforce because I see limited value. I then suggest to take this patch as is if you are also happy with it like others. The patch looks fine. I will commit it a bit later just to give a chance to Bertrand/Stefano to object. Cheers, -- Julien Grall
Re: [PATCH] xen/evtchn: Purge ERROR_EXIT{,_DOM}()
On 14/06/2023 7:52 am, Jan Beulich wrote: > On 13.06.2023 21:47, Roberto Bagnara wrote: >> On 13/06/23 19:45, Andrew Cooper wrote: >>> On 13/06/2023 6:39 pm, Julien Grall wrote: Hi, On 13/06/2023 17:22, Andrew Cooper wrote: > These are disliked specifically by MISRA, but they also interfere > with code Please explicitly name the rule. >>> I can't remember it off the top of my head. >>> >>> Stefano/Bertrand? >> Rule 2.1 > That's about unreachable code, but inside the constructs there's nothing > that's unreachable afaics. Plus expanding "manually" them wouldn't change > reachability, would it? I bet it's complaining about the while() after the goto. I can see why things end up caring - because this violation can only be spotted in the fully-preprocessed source where the macro-ness has gone away, and *then* applying blanket rules. Which comes back to the original point I made on the call yesterday that do{}while(0) correctness for macros is far more important than some, honestly suspect, claim about the resulting code being somehow "better" without the macro safety. ~Andrew
Re: [PATCH v4 27/34] nios2: Convert __pte_free_tlb() to use ptdescs
Hi Dinh, On Wed, Jun 14, 2023 at 12:17 AM Dinh Nguyen wrote: > On 6/12/23 16:04, Vishal Moola (Oracle) wrote: > > Part of the conversions to replace pgtable constructor/destructors with > > ptdesc equivalents. > > > > Signed-off-by: Vishal Moola (Oracle) > > --- > > arch/nios2/include/asm/pgalloc.h | 8 > > 1 file changed, 4 insertions(+), 4 deletions(-) > > > > diff --git a/arch/nios2/include/asm/pgalloc.h > > b/arch/nios2/include/asm/pgalloc.h > > index ecd1657bb2ce..ce6bb8e74271 100644 > > --- a/arch/nios2/include/asm/pgalloc.h > > +++ b/arch/nios2/include/asm/pgalloc.h > > @@ -28,10 +28,10 @@ static inline void pmd_populate(struct mm_struct *mm, > > pmd_t *pmd, > > > > extern pgd_t *pgd_alloc(struct mm_struct *mm); > > > > -#define __pte_free_tlb(tlb, pte, addr) \ > > - do {\ > > - pgtable_pte_page_dtor(pte); \ > > - tlb_remove_page((tlb), (pte)); \ > > +#define __pte_free_tlb(tlb, pte, addr) > > \ > > + do {\ > > + pagetable_pte_dtor(page_ptdesc(pte)); \ > > + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ > > } while (0) > > > > #endif /* _ASM_NIOS2_PGALLOC_H */ > > Applied! I don't think you can just apply this patch, as the new functions were only introduced in [PATCH v4 05/34] of this series. Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Re: [PATCH] xen/evtchn: Purge ERROR_EXIT{,_DOM}()
On 14.06.2023 11:21, Andrew Cooper wrote: > On 14/06/2023 7:52 am, Jan Beulich wrote: >> On 13.06.2023 21:47, Roberto Bagnara wrote: >>> On 13/06/23 19:45, Andrew Cooper wrote: On 13/06/2023 6:39 pm, Julien Grall wrote: > Hi, > > On 13/06/2023 17:22, Andrew Cooper wrote: >> These are disliked specifically by MISRA, but they also interfere >> with code > Please explicitly name the rule. I can't remember it off the top of my head. Stefano/Bertrand? >>> Rule 2.1 >> That's about unreachable code, but inside the constructs there's nothing >> that's unreachable afaics. Plus expanding "manually" them wouldn't change >> reachability, would it? > > I bet it's complaining about the while() after the goto. > > I can see why things end up caring - because this violation can only be > spotted in the fully-preprocessed source where the macro-ness has gone > away, and *then* applying blanket rules. Hmm, perhaps. > Which comes back to the original point I made on the call yesterday that > do{}while(0) correctness for macros is far more important than some, > honestly suspect, claim about the resulting code being somehow "better" > without the macro safety. Even further I would claim that the while(0) part of the construct isn't unreachable code simply because it doesn't result in any code being generated. But of course "code" here may mean "source code", not the resulting binary representation. Jan
[PATCH] xen/arm: Remove stray semicolon at VREG_REG_HELPERS/TLB_HELPER* callers
This is inconsistent with the rest of the code where macros are used to define functions, as it results in an empty declaration (i.e. semicolon with nothing before it) after function definition. This is also not allowed by C99. Take the opportunity to undefine TLB_HELPER* macros after last use. Signed-off-by: Michal Orzel --- Discussion: https://lore.kernel.org/xen-devel/17c59d5c-795e-4591-a7c9-a4c5179bf...@arm.com/ Other empty declarations appear at callers of TYPE_SAFE and Linux module macros like EXPORT_SYMBOL for which we need some sort of agreement. --- xen/arch/arm/include/asm/arm32/flushtlb.h | 12 +++- xen/arch/arm/include/asm/arm64/flushtlb.h | 17 ++--- xen/arch/arm/include/asm/vreg.h | 4 ++-- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/xen/arch/arm/include/asm/arm32/flushtlb.h b/xen/arch/arm/include/asm/arm32/flushtlb.h index 7ae6a12f8155..22ee3b317b4d 100644 --- a/xen/arch/arm/include/asm/arm32/flushtlb.h +++ b/xen/arch/arm/include/asm/arm32/flushtlb.h @@ -29,19 +29,21 @@ static inline void name(void) \ } /* Flush local TLBs, current VMID only */ -TLB_HELPER(flush_guest_tlb_local, TLBIALL, nsh); +TLB_HELPER(flush_guest_tlb_local, TLBIALL, nsh) /* Flush inner shareable TLBs, current VMID only */ -TLB_HELPER(flush_guest_tlb, TLBIALLIS, ish); +TLB_HELPER(flush_guest_tlb, TLBIALLIS, ish) /* Flush local TLBs, all VMIDs, non-hypervisor mode */ -TLB_HELPER(flush_all_guests_tlb_local, TLBIALLNSNH, nsh); +TLB_HELPER(flush_all_guests_tlb_local, TLBIALLNSNH, nsh) /* Flush innershareable TLBs, all VMIDs, non-hypervisor mode */ -TLB_HELPER(flush_all_guests_tlb, TLBIALLNSNHIS, ish); +TLB_HELPER(flush_all_guests_tlb, TLBIALLNSNHIS, ish) /* Flush all hypervisor mappings from the TLB of the local processor. */ -TLB_HELPER(flush_xen_tlb_local, TLBIALLH, nsh); +TLB_HELPER(flush_xen_tlb_local, TLBIALLH, nsh) + +#undef TLB_HELPER /* Flush TLB of local processor for address va. */ static inline void __flush_xen_tlb_one_local(vaddr_t va) diff --git a/xen/arch/arm/include/asm/arm64/flushtlb.h b/xen/arch/arm/include/asm/arm64/flushtlb.h index 3a9092b814a9..56c6fc763b56 100644 --- a/xen/arch/arm/include/asm/arm64/flushtlb.h +++ b/xen/arch/arm/include/asm/arm64/flushtlb.h @@ -67,25 +67,28 @@ static inline void name(vaddr_t va) \ } /* Flush local TLBs, current VMID only. */ -TLB_HELPER(flush_guest_tlb_local, vmalls12e1, nsh); +TLB_HELPER(flush_guest_tlb_local, vmalls12e1, nsh) /* Flush innershareable TLBs, current VMID only */ -TLB_HELPER(flush_guest_tlb, vmalls12e1is, ish); +TLB_HELPER(flush_guest_tlb, vmalls12e1is, ish) /* Flush local TLBs, all VMIDs, non-hypervisor mode */ -TLB_HELPER(flush_all_guests_tlb_local, alle1, nsh); +TLB_HELPER(flush_all_guests_tlb_local, alle1, nsh) /* Flush innershareable TLBs, all VMIDs, non-hypervisor mode */ -TLB_HELPER(flush_all_guests_tlb, alle1is, ish); +TLB_HELPER(flush_all_guests_tlb, alle1is, ish) /* Flush all hypervisor mappings from the TLB of the local processor. */ -TLB_HELPER(flush_xen_tlb_local, alle2, nsh); +TLB_HELPER(flush_xen_tlb_local, alle2, nsh) /* Flush TLB of local processor for address va. */ -TLB_HELPER_VA(__flush_xen_tlb_one_local, vae2); +TLB_HELPER_VA(__flush_xen_tlb_one_local, vae2) /* Flush TLB of all processors in the inner-shareable domain for address va. */ -TLB_HELPER_VA(__flush_xen_tlb_one, vae2is); +TLB_HELPER_VA(__flush_xen_tlb_one, vae2is) + +#undef TLB_HELPER +#undef TLB_HELPER_VA #endif /* __ASM_ARM_ARM64_FLUSHTLB_H__ */ /* diff --git a/xen/arch/arm/include/asm/vreg.h b/xen/arch/arm/include/asm/vreg.h index d92450017bc4..bf945eebbde4 100644 --- a/xen/arch/arm/include/asm/vreg.h +++ b/xen/arch/arm/include/asm/vreg.h @@ -140,8 +140,8 @@ static inline void vreg_reg##sz##_clearbits(uint##sz##_t *reg, \ *reg &= ~(((uint##sz##_t)bits & mask) << shift);\ } -VREG_REG_HELPERS(64, 0x7); -VREG_REG_HELPERS(32, 0x3); +VREG_REG_HELPERS(64, 0x7) +VREG_REG_HELPERS(32, 0x3) #undef VREG_REG_HELPERS base-commit: 2f69ef96801f0d2b9646abf6396e60f99c56e3a0 -- 2.25.1
Re: [PATCH v1 5/8] xen/riscv: introduce identity mapping
On Mon, 2023-06-12 at 15:48 +0200, Jan Beulich wrote: > On 06.06.2023 21:55, Oleksii Kurochko wrote: > > The way how switch to virtual address was implemented in the > > commit e66003e7be ("xen/riscv: introduce setup_initial_pages") > > wasn't safe enough so identity mapping was introduced and > > used. > > I don't think this is sufficient as a description. You want to make > clear what the "not safe enough" is, and you also want to go into > more detail as to the solution chosen. I'm particularly puzzled that > you map just two singular pages ... I'll update a descrption. I map two pages as it likely to be enough to switch from 1:1 mapping world. My point is that we need 1:1 mapping only for few instructions which are located in start() [ in .text.header section ]: li t0, XEN_VIRT_START la t1, start sub t1, t1, t0 /* Calculate proper VA after jump from 1:1 mapping */ la t0, .L_primary_switched sub t0, t0, t1 /* Jump from 1:1 mapping world */ jr t0 And it is needed to map 1:1 cpu0_boot_stack to not to fault when executing epilog of enable_mmu() function as it accesses a value on the stack: c1b0: 6422ld s0,8(sp) c1b2: 0141addisp,sp,16 c1b4: 8082ret > > > @@ -35,8 +40,10 @@ static unsigned long phys_offset; > > * > > * It might be needed one more page table in case when Xen load > > address > > * isn't 2 MB aligned. > > + * > > + * 3 additional page tables are needed for identity mapping. > > */ > > -#define PGTBL_INITIAL_COUNT ((CONFIG_PAGING_LEVELS - 1) + 1) > > +#define PGTBL_INITIAL_COUNT ((CONFIG_PAGING_LEVELS - 1) + 1 + 3) > > What is this 3 coming from? It feels like the value should (again) > somehow depend on CONFIG_PAGING_LEVELS. 3 is too much in the current case. It should be 2 more. The linker address is 0xFFFC thereby mapping the linker to load addresses we need 3 pages ( with the condition that the size of Xen won't be larger than 2 MB ) and 1 page if the case when Xen load address isn't 2MV aligned. We need 2 more page tables because Xen is loaded to address 0x8020 by OpenSBI and the load address isn't equal to the linker address so we need additional 2 pages as the L2 table we already allocated when mapping the linker to load addresses. And it looks like 2 is enough for Sv48, Sv57 as L4, L3 and L2 pagetables will be already allocated during mapping the linker to load addresses. And it only will be too much for Sv32 ( which has only L1, L0 in general ) but I am not sure that anyone cares about Sv32. > > > @@ -108,16 +116,18 @@ static void __init > > setup_initial_mapping(struct mmu_desc *mmu_desc, > > { > > unsigned long paddr = (page_addr - map_start) + > > pa_start; > > unsigned int permissions = PTE_LEAF_DEFAULT; > > + unsigned long addr = (is_identity_mapping) ? > > Nit: No need for parentheses here. Thanks. > > > + page_addr : > > LINK_TO_LOAD(page_addr); > > As a remark, while we want binary operators at the end of lines when > wrapping, we usually do things differently for the ternary operator: > Either > > unsigned long addr = is_identity_mapping > ? page_addr : > LINK_TO_LOAD(page_addr); > > or > > unsigned long addr = is_identity_mapping > ? page_addr > : LINK_TO_LOAD(page_addr); It looks better. Thanks. > > . > > > @@ -232,22 +242,27 @@ void __init setup_initial_pagetables(void) > > linker_start, > > linker_end, > > load_start); > > + > > + if ( linker_start == load_start ) > > + return; > > + > > + setup_initial_mapping(&mmu_desc, > > + load_start, > > + load_start + PAGE_SIZE, > > + load_start); > > + > > + setup_initial_mapping(&mmu_desc, > > + (unsigned long)cpu0_boot_stack, > > + (unsigned long)cpu0_boot_stack + > > PAGE_SIZE, > > Shouldn't this be STACK_SIZE (and then also be prepared for > STACK_SIZE > PAGE_SIZE)? Yes, it will be better to use STACK_SIZE but for 1:1 mapping it will be enough PAGE_SIZE as I mentioned above this only need for epilogue of enable_mmu() function. > > > + (unsigned long)cpu0_boot_stack); > > } > > > > -void __init noreturn noinline enable_mmu() > > +/* > > + * enable_mmu() can't be __init because __init section isn't part > > of identity > > + * mapping so it will cause an issue after MMU will be enabled. > > + */ > > As hinted at above already - perhaps the identity mapping wants to be > large
Re: compat code lacks support for __attribute__
On 14.06.2023 10:43, Olaf Hering wrote: > I would like to share code between the hypervisor and xenalyze, so that > every definition comes from a single place and is not duplicated in xen > and xenalyze. This works as long as simple constructs like struct x {}; > are used. Depending on the data to be stored into the trace buffer, having > support for __packed may reduce the amount of data that need to be copied. > > The tooling currently converts ___attribute__ into compat___attribute__. > Was support for __attribute__ intentionally left out, or was there just > no usecase for it? It was intentionally left out, as the public headers that are subject to compat translation are supposed to not use any compiler extensions. See also our checking that these headers build in plain ANSI C mode (which of course doesn't catch all possible cases, as macros would need actually using in order to spot issues). However, if you're after adding packed attributes, and if you're meaning to only communicate between Xen and the tool stack, then the requirement above doesn't exist. Yet then I would also wonder whether you need any compat translation in the first place. Those packed structures would better be arch-/bitness-agnostic, wouldn't they? So perhaps we could arrange for your additions to be excluded from the compat translation machinery? Another option to consider might be to use #pragma pack instead of packed attributes. (I can't say off the top of my head whether such #pragma-s would survive the translation.) Jan
Re: [PATCH] xen: Append a newline character to panic() where missing
Hi Michal, > On 14 Jun 2023, at 09:30, Michal Orzel wrote: > > Missing newline is inconsistent with the rest of the callers, since > panic() expects it. > > Signed-off-by: Michal Orzel Reviewed-by: Bertrand Marquis And I agree with Julien: trailing punctuation is not an issue and I would definitely not require you to fix it. Cheers Bertrand > --- > xen/arch/arm/bootfdt.c| 2 +- > xen/arch/arm/domain_build.c | 6 +++--- > xen/arch/x86/cpu/microcode/core.c | 2 +- > 3 files changed, 5 insertions(+), 5 deletions(-) > > diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c > index b6f92a174f5f..2673ad17a1e1 100644 > --- a/xen/arch/arm/bootfdt.c > +++ b/xen/arch/arm/bootfdt.c > @@ -225,7 +225,7 @@ static int __init process_reserved_memory_node(const void > *fdt, int node, > size_cells, data); > > if ( rc == -ENOSPC ) > -panic("Max number of supported reserved-memory regions reached."); > +panic("Max number of supported reserved-memory regions reached.\n"); > else if ( rc != -ENOENT ) > return rc; > return 0; > diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > index 579bc8194fed..d0d6be922db1 100644 > --- a/xen/arch/arm/domain_build.c > +++ b/xen/arch/arm/domain_build.c > @@ -74,7 +74,7 @@ int __init parse_arch_dom0_param(const char *s, const char > *e) > > return 0; > #else > -panic("'sve' property found, but CONFIG_ARM64_SVE not selected"); > +panic("'sve' property found, but CONFIG_ARM64_SVE not selected\n"); > #endif > } > > @@ -697,7 +697,7 @@ static void __init allocate_static_memory(struct domain > *d, > return; > > fail: > -panic("Failed to allocate requested static memory for domain %pd.", d); > +panic("Failed to allocate requested static memory for domain %pd.\n", d); > } > > /* > @@ -769,7 +769,7 @@ static void __init assign_static_memory_11(struct domain > *d, > return; > > fail: > -panic("Failed to assign requested static memory for direct-map domain > %pd.", > +panic("Failed to assign requested static memory for direct-map domain > %pd.\n", > d); > } > > diff --git a/xen/arch/x86/cpu/microcode/core.c > b/xen/arch/x86/cpu/microcode/core.c > index e65af4b82ea3..c3fee629063e 100644 > --- a/xen/arch/x86/cpu/microcode/core.c > +++ b/xen/arch/x86/cpu/microcode/core.c > @@ -524,7 +524,7 @@ static int control_thread_fn(const struct microcode_patch > *patch) > */ > if ( wait_for_condition(wait_cpu_callout, (done + 1), > MICROCODE_UPDATE_TIMEOUT_US) ) > -panic("Timeout when finished updating microcode (finished > %u/%u)", > +panic("Timeout when finished updating microcode (finished > %u/%u)\n", > done, nr_cores); > > /* Print warning message once if long time is spent here */ > > base-commit: 2f69ef96801f0d2b9646abf6396e60f99c56e3a0 > -- > 2.25.1 >
Re: [PATCH] xen/arm: Remove stray semicolon at VREG_REG_HELPERS/TLB_HELPER* callers
On 14.06.2023 11:41, Michal Orzel wrote: > This is inconsistent with the rest of the code where macros are used > to define functions, as it results in an empty declaration (i.e. > semicolon with nothing before it) after function definition. This is also > not allowed by C99. > > Take the opportunity to undefine TLB_HELPER* macros after last use. > > Signed-off-by: Michal Orzel > --- > Discussion: > https://lore.kernel.org/xen-devel/17c59d5c-795e-4591-a7c9-a4c5179bf...@arm.com/ > > Other empty declarations appear at callers of TYPE_SAFE and Linux module > macros like EXPORT_SYMBOL for which we need some sort of agreement. As said elsewhere, I think we should finally get rid of EXPORT_SYMBOL(). For TYPE_SAFE() I think some re-arrangement of the macros can address the Misra concern. Jan
Re: [PATCH v1 5/8] xen/riscv: introduce identity mapping
On Mon, 2023-06-12 at 16:24 +0200, Jan Beulich wrote: > On 12.06.2023 15:48, Jan Beulich wrote: > > On 06.06.2023 21:55, Oleksii Kurochko wrote: > > > -void __init noreturn noinline enable_mmu() > > > +/* > > > + * enable_mmu() can't be __init because __init section isn't > > > part of identity > > > + * mapping so it will cause an issue after MMU will be enabled. > > > + */ > > > > As hinted at above already - perhaps the identity mapping wants to > > be > > larger, up to covering the entire Xen image? Since it's temporary > > only anyway, you could even consider using a large page (and RWX > > permission). You already require no overlap of link and load > > addresses, > > so at least small page mappings ought to be possible for the entire > > image. > > To expand on that: Assume a future change on this path results in a > call > to memcpy() or memset() being introduced by the compiler (and then > let's > further assume this only occurs for a specific compiler version). > Right > now such a case would be noticed simply because we don't build those > library functions yet. But it'll likely be a perplexing crash once a > full > hypervisor can be built, the more that exception handlers also aren't > mapped. It makes sense but for some reason it doesn't crash ( I have a bunch of patches to run dom0 ) but as I mentioned in the previous e-mail I agree that probably it would be better to map the whole image using 1 Gb page table for example. > > > > - mmu_is_enabled: > > > /* > > > - * Stack should be re-inited as: > > > - * 1. Right now an address of the stack is relative to load > > > time > > > - * addresses what will cause an issue in case of load > > > start address > > > - * isn't equal to linker start address. > > > - * 2. Addresses in stack are all load time relative which > > > can be an > > > - * issue in case when load start address isn't equal to > > > linker > > > - * start address. > > > - * > > > - * We can't return to the caller because the stack was > > > reseted > > > - * and it may have stash some variable on the stack. > > > - * Jump to a brand new function as the stack was reseted > > > + * id_addrs should be in sync with id mapping in > > > + * setup_initial_pagetables() > > > > What is "id" meant to stand for here? Also if things need keeping > > in > > sync, then a similar comment should exist on the other side. > > I guess it's meant to stand for "identity mapping", but the common > use > of "id" makes we wonder if the variable wouldn't better be > ident_addrs[]. It would be better. but probably we will remove that variable if we agree to map the whole Xen instead of parts. So I'll wait for your response before starting to work on new patch series. Thanks a lot. ~ Oleksii
Re: [PATCH v1 5/8] xen/riscv: introduce identity mapping
On 14.06.2023 11:47, Oleksii wrote: > On Mon, 2023-06-12 at 15:48 +0200, Jan Beulich wrote: >> On 06.06.2023 21:55, Oleksii Kurochko wrote: >>> The way how switch to virtual address was implemented in the >>> commit e66003e7be ("xen/riscv: introduce setup_initial_pages") >>> wasn't safe enough so identity mapping was introduced and >>> used. >> >> I don't think this is sufficient as a description. You want to make >> clear what the "not safe enough" is, and you also want to go into >> more detail as to the solution chosen. I'm particularly puzzled that >> you map just two singular pages ... > I'll update a descrption. > > I map two pages as it likely to be enough to switch from 1:1 mapping I dislike your use of "likely" here: Unless this code is meant to be redone anyway, it should just work. Everywhere. > world. My point is that we need 1:1 mapping only for few instructions > which are located in start() [ in .text.header section ]: > > li t0, XEN_VIRT_START > la t1, start > sub t1, t1, t0 > > /* Calculate proper VA after jump from 1:1 mapping */ > la t0, .L_primary_switched > sub t0, t0, t1 > > /* Jump from 1:1 mapping world */ > jr t0 > > And it is needed to map 1:1 cpu0_boot_stack to not to fault when > executing epilog of enable_mmu() function as it accesses a value on the > stack: > c1b0: 6422ld s0,8(sp) > c1b2: 0141addisp,sp,16 > c1b4: 8082ret > >> >>> @@ -35,8 +40,10 @@ static unsigned long phys_offset; >>> * >>> * It might be needed one more page table in case when Xen load >>> address >>> * isn't 2 MB aligned. >>> + * >>> + * 3 additional page tables are needed for identity mapping. >>> */ >>> -#define PGTBL_INITIAL_COUNT ((CONFIG_PAGING_LEVELS - 1) + 1) >>> +#define PGTBL_INITIAL_COUNT ((CONFIG_PAGING_LEVELS - 1) + 1 + 3) >> >> What is this 3 coming from? It feels like the value should (again) >> somehow depend on CONFIG_PAGING_LEVELS. > 3 is too much in the current case. It should be 2 more. > > The linker address is 0xFFFC thereby mapping the linker to > load addresses > we need 3 pages ( with the condition that the size of Xen won't be > larger than 2 MB ) and 1 page if the case when Xen load address isn't > 2MV aligned. > > We need 2 more page tables because Xen is loaded to address 0x8020 > by OpenSBI and the load address isn't equal to the linker address so we > need additional 2 pages as the L2 table we already allocated when > mapping the linker to load addresses. > > And it looks like 2 is enough for Sv48, Sv57 as L4, L3 and L2 > pagetables will be already allocated during mapping the linker to load > addresses. I agree the root table will be there. But depending on load address, I don't think you can rely on any other tables to be re-usable from the Xen mappings you already have. So my gut feeling would be #define PGTBL_INITIAL_COUNT ((CONFIG_PAGING_LEVELS - 1) * 2 + 1) >>> @@ -232,22 +242,27 @@ void __init setup_initial_pagetables(void) >>> linker_start, >>> linker_end, >>> load_start); >>> + >>> + if ( linker_start == load_start ) >>> + return; >>> + >>> + setup_initial_mapping(&mmu_desc, >>> + load_start, >>> + load_start + PAGE_SIZE, >>> + load_start); >>> + >>> + setup_initial_mapping(&mmu_desc, >>> + (unsigned long)cpu0_boot_stack, >>> + (unsigned long)cpu0_boot_stack + >>> PAGE_SIZE, >> >> Shouldn't this be STACK_SIZE (and then also be prepared for >> STACK_SIZE > PAGE_SIZE)? > Yes, it will be better to use STACK_SIZE but for 1:1 mapping it will be > enough PAGE_SIZE as I mentioned above this only need for epilogue of > enable_mmu() function. But then it should still be the correct page of the stack that you map (the top one aiui). >>> -void __init noreturn noinline enable_mmu() >>> +/* >>> + * enable_mmu() can't be __init because __init section isn't part >>> of identity >>> + * mapping so it will cause an issue after MMU will be enabled. >>> + */ >> >> As hinted at above already - perhaps the identity mapping wants to be >> larger, up to covering the entire Xen image? Since it's temporary >> only anyway, you could even consider using a large page (and RWX >> permission). You already require no overlap of link and load >> addresses, >> so at least small page mappings ought to be possible for the entire >> image. > It makes sense and started to thought about that after I applied the > patch for Dom0 running... I think we can map 1 GB page which will cover > the whole Xen image. Also in that case we haven't to allocate 2 more > page tables. But you then need to be careful about not mapping space acc
Re: compat code lacks support for __attribute__
Wed, 14 Jun 2023 11:49:35 +0200 Jan Beulich : > So perhaps we could arrange for your additions to be excluded > from the compat translation machinery? Is there maybe a place for headers which are not public, but which are visible for tools/ and xen/? I think the trace functionality would be a candidate for such local shared headers. Olaf pgpX0celmkZaO.pgp Description: Digitale Signatur von OpenPGP
Re: compat code lacks support for __attribute__
On 14.06.2023 12:08, Olaf Hering wrote: > Wed, 14 Jun 2023 11:49:35 +0200 Jan Beulich : > >> So perhaps we could arrange for your additions to be excluded >> from the compat translation machinery? > > Is there maybe a place for headers which are not public, > but which are visible for tools/ and xen/? I think the trace > functionality would be a candidate for such local shared headers. I'm not sure I understand what you're after. The "public" here means the externally visible interface of the hypervisor. Aiui the header your thinking of still falls in this category. In how far checking and translation need to be done for any particular header under public/ is controlled by xen/include/Makefile. So if you were to introduce a new header for your specific purpose, all would depend on what, if anything, you add to that makefile. I don't think we currently have any header which is entirely unmentioned there, but with the right justification I don't see why this couldn't change (or perhaps such a header might simply be added to headers-n, so it's clear that it wasn't just an oversight that it's not listed anywhere [else]). Jan
[xen-unstable test] 181415: tolerable FAIL - PUSHED
flight 181415 xen-unstable real [real] flight 181420 xen-unstable real-retest [real] http://logs.test-lab.xenproject.org/osstest/logs/181415/ http://logs.test-lab.xenproject.org/osstest/logs/181420/ Failures :-/ but no regressions. Tests which are failing intermittently (not blocking): test-amd64-i386-livepatch 7 xen-install fail pass in 181420-retest test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 12 debian-hvm-install fail pass in 181420-retest test-amd64-amd64-xl-qcow222 guest-start.2 fail pass in 181420-retest test-amd64-amd64-xl-qemuu-win7-amd64 12 windows-install fail pass in 181420-retest Tests which did not succeed, but are not blocking: test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stop fail in 181420 like 181398 test-amd64-i386-xl-vhd 21 guest-start/debian.repeatfail like 181352 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail like 181385 test-armhf-armhf-libvirt 16 saverestore-support-checkfail like 181385 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check fail like 181385 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 181398 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 181398 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 181398 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 181398 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 181398 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 181398 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 181398 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 181398 test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-amd64-i386-xl-pvshim14 guest-start fail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-i386-libvirt 15 migrate-support-checkfail never pass test-amd64-i386-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit2 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit2 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-credit1 15 migrate-support-checkfail never pass test-arm64-arm64-xl-credit1 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail never pass test-arm64-arm64-xl 15 migrate-support-checkfail never pass test-arm64-arm64-xl 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-amd64-i386-libvirt-raw 14 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail never pass test-arm64-arm64-xl-vhd 14 migrate-support-checkfail never pass test-arm64-arm64-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl-vhd 15 saverestore-support-checkfail never pass test-armhf-armhf-xl-arndale 15 migrate-support-checkfail never pass test-armhf-armhf-xl-arndale 16 saverestore-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail never pass test-armhf-armhf-libvirt 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit1 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-credit2 15 migrate-support-checkfail never pass test-armhf-armhf-xl-credit2 16 saverestore-suppo
[qemu-mainline test] 181419: regressions - FAIL
flight 181419 qemu-mainline real [real] http://logs.test-lab.xenproject.org/osstest/logs/181419/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-amd64-xsm 6 xen-buildfail REGR. vs. 180691 build-arm64 6 xen-buildfail REGR. vs. 180691 build-arm64-xsm 6 xen-buildfail REGR. vs. 180691 build-amd64 6 xen-buildfail REGR. vs. 180691 build-i3866 xen-buildfail REGR. vs. 180691 build-i386-xsm6 xen-buildfail REGR. vs. 180691 build-armhf 6 xen-buildfail REGR. vs. 180691 Tests which did not succeed, but are not blocking: test-amd64-i386-xl-qemuu-debianhvm-amd64-shadow 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-debianhvm-i386-xsm 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-dmrestrict-amd64-dmrestrict 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-win7-amd64 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ws16-amd64 1 build-check(1) blocked n/a test-amd64-i386-xl-shadow 1 build-check(1) blocked n/a test-amd64-i386-xl-vhd1 build-check(1) blocked n/a test-amd64-i386-xl-xsm1 build-check(1) blocked n/a test-arm64-arm64-libvirt-raw 1 build-check(1) blocked n/a test-arm64-arm64-libvirt-xsm 1 build-check(1) blocked n/a test-arm64-arm64-xl 1 build-check(1) blocked n/a test-arm64-arm64-xl-credit1 1 build-check(1) blocked n/a test-arm64-arm64-xl-credit2 1 build-check(1) blocked n/a test-arm64-arm64-xl-thunderx 1 build-check(1) blocked n/a test-arm64-arm64-xl-vhd 1 build-check(1) blocked n/a test-arm64-arm64-xl-xsm 1 build-check(1) blocked n/a test-armhf-armhf-libvirt 1 build-check(1) blocked n/a test-armhf-armhf-libvirt-qcow2 1 build-check(1) blocked n/a test-armhf-armhf-libvirt-raw 1 build-check(1) blocked n/a test-armhf-armhf-xl 1 build-check(1) blocked n/a test-armhf-armhf-xl-arndale 1 build-check(1) blocked n/a test-armhf-armhf-xl-multivcpu 1 build-check(1) blocked n/a test-armhf-armhf-xl-rtds 1 build-check(1) blocked n/a test-armhf-armhf-xl-vhd 1 build-check(1) blocked n/a test-amd64-i386-qemuu-rhel6hvm-amd 1 build-check(1) blocked n/a test-amd64-i386-pair 1 build-check(1) blocked n/a test-amd64-i386-libvirt-xsm 1 build-check(1) blocked n/a test-amd64-i386-libvirt-raw 1 build-check(1) blocked n/a test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 1 build-check(1) blocked n/a test-amd64-i386-libvirt-pair 1 build-check(1) blocked n/a test-amd64-i386-libvirt 1 build-check(1) blocked n/a test-amd64-i386-freebsd10-i386 1 build-check(1) blocked n/a test-amd64-i386-freebsd10-amd64 1 build-check(1) blocked n/a test-amd64-coresched-i386-xl 1 build-check(1) blocked n/a test-amd64-coresched-amd64-xl 1 build-check(1) blocked n/a test-amd64-amd64-xl-xsm 1 build-check(1) blocked n/a build-amd64-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-xl-shadow1 build-check(1) blocked n/a test-amd64-amd64-xl-rtds 1 build-check(1) blocked n/a build-arm64-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-ws16-amd64 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-win7-amd64 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a build-armhf-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 1 build-check(1) blocked n/a build-i386-libvirt1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm 1 build-check(1) blocked n/a test-amd64-amd64-dom0pvh-xl-amd 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-debianhvm-amd64-shadow 1 build-check(1) blocked n/a test-amd64-amd64-dom0pvh-xl-intel 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-debianhvm-amd64 1 build-check(1)blocked n/a test-amd64-amd64-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-libvirt-pair 1 build-check(1) blocked n/a test-amd64-amd64-libvirt-qemuu-de
[linux-linus test] 181417: regressions - FAIL
flight 181417 linux-linus real [real] http://logs.test-lab.xenproject.org/osstest/logs/181417/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: test-armhf-armhf-xl-credit1 8 xen-boot fail REGR. vs. 180278 build-arm64-pvops 6 kernel-build fail REGR. vs. 180278 Tests which did not succeed, but are not blocking: test-arm64-arm64-xl-credit1 1 build-check(1) blocked n/a test-arm64-arm64-xl-credit2 1 build-check(1) blocked n/a test-arm64-arm64-xl-thunderx 1 build-check(1) blocked n/a test-arm64-arm64-xl-vhd 1 build-check(1) blocked n/a test-arm64-arm64-xl-xsm 1 build-check(1) blocked n/a test-arm64-arm64-examine 1 build-check(1) blocked n/a test-arm64-arm64-libvirt-raw 1 build-check(1) blocked n/a test-arm64-arm64-libvirt-xsm 1 build-check(1) blocked n/a test-arm64-arm64-xl 1 build-check(1) blocked n/a test-armhf-armhf-xl-multivcpu 8 xen-boot fail like 180278 test-armhf-armhf-xl-credit2 8 xen-boot fail like 180278 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 180278 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 180278 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 180278 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 180278 test-armhf-armhf-libvirt 8 xen-boot fail like 180278 test-armhf-armhf-xl-arndale 8 xen-boot fail like 180278 test-armhf-armhf-examine 8 reboot fail like 180278 test-armhf-armhf-libvirt-raw 8 xen-boot fail like 180278 test-armhf-armhf-libvirt-qcow2 8 xen-bootfail like 180278 test-armhf-armhf-xl-vhd 8 xen-boot fail like 180278 test-armhf-armhf-xl 8 xen-boot fail like 180278 test-armhf-armhf-xl-rtds 8 xen-boot fail like 180278 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 180278 test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-amd64-amd64-libvirt-raw 14 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qcow2 14 migrate-support-checkfail never pass version targeted for testing: linuxb6dad5178ceaf23f369c3711062ce1f2afc33644 baseline version: linux6c538e1adbfc696ac4747fb10d63e704344f763d Last test of basis 180278 2023-04-16 19:41:46 Z 58 days Failing since180281 2023-04-17 06:24:36 Z 58 days 111 attempts Testing same since 181417 2023-06-14 04:18:54 Z0 days1 attempts 2688 people touched revisions under test, not listing them all jobs: build-amd64-xsm pass build-arm64-xsm pass build-i386-xsm pass build-amd64 pass build-arm64 pass build-armhf pass build-i386 pass build-amd64-libvirt pass build-arm64-libvirt pass build-armhf-libvirt pass build-i386-libvirt pass build-amd64-pvopspass build-arm64-pvopsfail build-armhf-pvopspass build-i386-pvops pass test-amd64-amd64-xl pass test-amd64-coresched-amd64-xlpass test-arm64-arm64-xl blocked test-armhf-armhf-xl fail test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm pass test-amd64-amd64-xl-qemut-stubdom-debianhvm-amd64-xsmpass test-amd64-amd64-xl-qemut-debianhvm-i386-xsm pass test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm pass test-amd64-amd64-libvirt-xsm pass test-arm64-arm64-libvirt-xsm blocked test
Re: [PATCH v1 5/8] xen/riscv: introduce identity mapping
> > +} > > + > > +void __init remove_identity_mapping(void) > > +{ > > + int i, j; > > Nit: unsigned int please. > > It should be int in the current case because of the 'for' exit condition: for ( pgtbl = stage1_pgtbl_root, i = CONFIG_PAGING_LEVELS - 1; i >= 0; i-- ) Should exit condition be re-writen? ~ Oleksii
Re: [PATCH v1 5/8] xen/riscv: introduce identity mapping
On 14.06.2023 13:06, Oleksii wrote: >>> +} >>> + >>> +void __init remove_identity_mapping(void) >>> +{ >>> + int i, j; >> >> Nit: unsigned int please. >> >> > It should be int in the current case because of the 'for' exit > condition: > for ( pgtbl = stage1_pgtbl_root, i = CONFIG_PAGING_LEVELS - 1; i >> = 0; i-- ) > > Should exit condition be re-writen? Since it easily can be, I think that would be preferable. But in a case like this, if you think it's better the way you have it, so be it (until someone comes along and changes it). Jan
Re: [PATCH v1 3/8] xen/riscv: introduce reset_stack() function
On Mon, 2023-06-12 at 09:12 +0200, Jan Beulich wrote: > On 06.06.2023 21:55, Oleksii Kurochko wrote: > > Signed-off-by: Oleksii Kurochko > > This wants addressing the "Why?" aspect in the description. Is the > present > code wrong in some perhaps subtle way? Are you meaning to re-use the > code? > If so, in which way (which is relevant to determine whether the new > function may actually continue to live in .text.header)? > As I mentioned in previous e-mail there is no such requirement for reset_stack() function to live in .text.header. I think such requirement exists only for _start() function as we would like to see it at the start of image as a bootloader jumps to it and it is expected to be the first instructions. Even though I don't see any issue for reset_stack() to live in .text.header section, should it be moved to .text section? If yes, I would appreciate hearing why it would be better. ~ Oleksii
Re: [PATCH v1 3/8] xen/riscv: introduce reset_stack() function
On 14.06.2023 14:19, Oleksii wrote: > On Mon, 2023-06-12 at 09:12 +0200, Jan Beulich wrote: >> On 06.06.2023 21:55, Oleksii Kurochko wrote: >>> Signed-off-by: Oleksii Kurochko >> >> This wants addressing the "Why?" aspect in the description. Is the >> present >> code wrong in some perhaps subtle way? Are you meaning to re-use the >> code? >> If so, in which way (which is relevant to determine whether the new >> function may actually continue to live in .text.header)? >> > As I mentioned in previous e-mail there is no such requirement for > reset_stack() function to live in .text.header. > > I think such requirement exists only for _start() function as we would > like to see it at the start of image as a bootloader jumps to it and it > is expected to be the first instructions. > > Even though I don't see any issue for reset_stack() to live in > .text.header section, should it be moved to .text section? If yes, I > would appreciate hearing why it would be better. Because of the simple principle of special sections better only holding what really needs to be there. Jan
Re: [PATCH] iommu/amd-vi: adjust _amd_iommu_flush_pages() to handle pseudo-domids
On 14.06.2023 10:32, Roger Pau Monne wrote: > When the passed domain is DomIO iterate over the list of DomIO > assigned devices and flush each pseudo-domid found. > > invalidate_all_domain_pages() does call amd_iommu_flush_all_pages() > with DomIO as a parameter, Does it? Since the full function is visible in the patch (because of the "While there ..." change), it seems pretty clear that it doesn't: for_each_domain() iterates over ordinary domains only. > and hence the underlying > _amd_iommu_flush_pages() implementation must be capable of flushing > all pseudo-domids used by the quarantine domain logic. While it didn't occur to me right away when we discussed this, it may well be that I left alone all flushing when introducing the pseudo domain IDs simply because no flushing would ever happen for the quarantine domain. > While there fix invalidate_all_domain_pages() to only attempt to flush > the domains that have IOMMU enabled, otherwise the flush is pointless. For the moment at least it looks to me as if this change alone wants to go in. Jan
Re: [PATCH v4 01/34] mm: Add PAGE_TYPE_OP folio functions
On Mon, Jun 12, 2023 at 02:03:50PM -0700, Vishal Moola (Oracle) wrote: > No folio equivalents for page type operations have been defined, so > define them for later folio conversions. > > Also changes the Page##uname macros to take in const struct page* since > we only read the memory here. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > include/linux/page-flags.h | 20 ++-- > 1 file changed, 18 insertions(+), 2 deletions(-) > > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h > index 92a2063a0a23..e99a616b9bcd 100644 > --- a/include/linux/page-flags.h > +++ b/include/linux/page-flags.h > @@ -908,6 +908,8 @@ static inline bool is_page_hwpoison(struct page *page) > > #define PageType(page, flag) \ > ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) > +#define folio_test_type(folio, flag) \ > + ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) > > static inline int page_type_has_type(unsigned int page_type) > { > @@ -920,20 +922,34 @@ static inline int page_has_type(struct page *page) > } > > #define PAGE_TYPE_OPS(uname, lname) \ > -static __always_inline int Page##uname(struct page *page)\ > +static __always_inline int Page##uname(const struct page *page) > \ > {\ > return PageType(page, PG_##lname); \ > }\ > +static __always_inline int folio_test_##lname(const struct folio *folio)\ > +{\ > + return folio_test_type(folio, PG_##lname); \ > +}\ > static __always_inline void __SetPage##uname(struct page *page) > \ > {\ > VM_BUG_ON_PAGE(!PageType(page, 0), page); \ > page->page_type &= ~PG_##lname; \ > }\ > +static __always_inline void __folio_set_##lname(struct folio *folio) \ > +{\ > + VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ > + folio->page.page_type &= ~PG_##lname; \ > +}\ > static __always_inline void __ClearPage##uname(struct page *page)\ > {\ > VM_BUG_ON_PAGE(!Page##uname(page), page); \ > page->page_type |= PG_##lname; \ > -} > +}\ > +static __always_inline void __folio_clear_##lname(struct folio *folio) > \ > +{\ > + VM_BUG_ON_FOLIO(!folio_test_##lname(folio), folio); \ > + folio->page.page_type |= PG_##lname;\ > +}\ > > /* > * PageBuddy() indicates that the page is free and in the buddy system > -- > 2.40.1 > > -- Sincerely yours, Mike.
Functions _spin_lock_cb() and handle_ro_raz()
Hello everyone, I am working on the violations of MISRA C:2012 Rule 8.10, whose headline says: "An inline function shall be declared with the static storage class". For both ARM64 and X86_64 builds, function _spin_lock_cb() defined in spinlock.c violates the rule. Such function is declared in spinlock.h without the inline function specifier: are there any reasons to do this? What about solving the violation by moving the function definition in spinlock.h and declaring it as static inline? The same happens also for the function handle_ro_raz() in the ARM64 build, declared in traps.h and defined in traps.c. Regards, Federico Serafini
Re: [PATCH v4 02/34] s390: Use _pt_s390_gaddr for gmap address tracking
On Mon, Jun 12, 2023 at 02:03:51PM -0700, Vishal Moola (Oracle) wrote: > s390 uses page->index to keep track of page tables for the guest address > space. In an attempt to consolidate the usage of page fields in s390, > replace _pt_pad_2 with _pt_s390_gaddr to replace page->index in gmap. > > This will help with the splitting of struct ptdesc from struct page, as > well as allow s390 to use _pt_frag_refcount for fragmented page table > tracking. > > Since page->_pt_s390_gaddr aliases with mapping, ensure its set to NULL > before freeing the pages as well. I'm looking at the final result and unless I've missed something, setting of _pt_s390_gaddr to 0 is always followed by pagetable_free(). Can't we have pagetable_free() take care of zeroing _pt_s390_gaddr? I think patch 16 ("s390: Convert various gmap functions to use ptdescs") would be the right place for that. Otherwise: Acked-by: Mike Rapoport (IBM) > This also reverts commit 7e25de77bc5ea ("s390/mm: use pmd_pgtable_page() > helper in __gmap_segment_gaddr()") which had s390 use > pmd_pgtable_page() to get a gmap page table, as pmd_pgtable_page() > should be used for more generic process page tables. > > Signed-off-by: Vishal Moola (Oracle) > --- > arch/s390/mm/gmap.c | 56 +++- > include/linux/mm_types.h | 2 +- > 2 files changed, 39 insertions(+), 19 deletions(-) > > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c > index dc90d1eb0d55..81c683426b49 100644 > --- a/arch/s390/mm/gmap.c > +++ b/arch/s390/mm/gmap.c > @@ -70,7 +70,7 @@ static struct gmap *gmap_alloc(unsigned long limit) > page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); > if (!page) > goto out_free; > - page->index = 0; > + page->_pt_s390_gaddr = 0; > list_add(&page->lru, &gmap->crst_list); > table = page_to_virt(page); > crst_table_init(table, etype); > @@ -187,16 +187,20 @@ static void gmap_free(struct gmap *gmap) > if (!(gmap_is_shadow(gmap) && gmap->removed)) > gmap_flush_tlb(gmap); > /* Free all segment & region tables. */ > - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) > + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { > + page->_pt_s390_gaddr = 0; > __free_pages(page, CRST_ALLOC_ORDER); > + } > gmap_radix_tree_free(&gmap->guest_to_host); > gmap_radix_tree_free(&gmap->host_to_guest); > > /* Free additional data for a shadow gmap */ > if (gmap_is_shadow(gmap)) { > /* Free all page tables. */ > - list_for_each_entry_safe(page, next, &gmap->pt_list, lru) > + list_for_each_entry_safe(page, next, &gmap->pt_list, lru) { > + page->_pt_s390_gaddr = 0; > page_table_free_pgste(page); > + } > gmap_rmap_radix_tree_free(&gmap->host_to_rmap); > /* Release reference to the parent */ > gmap_put(gmap->parent); > @@ -318,12 +322,14 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned > long *table, > list_add(&page->lru, &gmap->crst_list); > *table = __pa(new) | _REGION_ENTRY_LENGTH | > (*table & _REGION_ENTRY_TYPE_MASK); > - page->index = gaddr; > + page->_pt_s390_gaddr = gaddr; > page = NULL; > } > spin_unlock(&gmap->guest_table_lock); > - if (page) > + if (page) { > + page->_pt_s390_gaddr = 0; > __free_pages(page, CRST_ALLOC_ORDER); > + } > return 0; > } > > @@ -336,12 +342,14 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned > long *table, > static unsigned long __gmap_segment_gaddr(unsigned long *entry) > { > struct page *page; > - unsigned long offset; > + unsigned long offset, mask; > > offset = (unsigned long) entry / sizeof(unsigned long); > offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; > - page = pmd_pgtable_page((pmd_t *) entry); > - return page->index + offset; > + mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); > + page = virt_to_page((void *)((unsigned long) entry & mask)); > + > + return page->_pt_s390_gaddr + offset; > } > > /** > @@ -1351,6 +1359,7 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned > long raddr) > /* Free page table */ > page = phys_to_page(pgt); > list_del(&page->lru); > + page->_pt_s390_gaddr = 0; > page_table_free_pgste(page); > } > > @@ -1379,6 +1388,7 @@ static void __gmap_unshadow_sgt(struct gmap *sg, > unsigned long raddr, > /* Free page table */ > page = phys_to_page(pgt); > list_del(&page->lru); > + page->_pt_s390_gaddr = 0; > page_table_free_pgste(page); > } > } > @@ -1409,6 +1419,7 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned > long raddr) >
Re: [XEN PATCH] xen: fixed violations of MISRA C:2012 Rule 3.1
On 13/06/23 11:44, Julien Grall wrote: Hi, On 13/06/2023 09:27, Jan Beulich wrote: On 13.06.2023 09:42, Nicola Vetrini wrote: The xen sources contain several violations of Rule 3.1 from MISRA C:2012, whose headline states: "The character sequences '/*' and '//' shall not be used within a comment". Most of the violations are due to the presence of links to webpages within C-style comment blocks, such as: xen/arch/arm/include/asm/smccc.h:37.1-41.3 /* * This file provides common defines for ARM SMC Calling Convention as * specified in * http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html */ In this case, we propose to deviate all of these occurrences with a project deviation to be captured by a tool configuration. There are, however, a few other violations that do not fall under this category, namely: 1. in file "xen/arch/arm/include/asm/arm64/flushtlb.h" we propose to avoid the usage of a nested comment; 2. in file "xen/common/xmalloc_tlsf.c" we propose to substitute the commented-out if statement with a "#if 0 .. #endif; 3. in file "xen/include/xen/atomic.h" and "xen/drivers/passthrough/arm/smmu-v3.c" we propose to split the C-style comment containing the nested comment into two doxygen comments, clearly identifying the second as a code sample. This can then be captured with a project deviation by a tool configuration. Signed-off-by: Nicola Vetrini --- Changes: - Resending the patch with the right maintainers in CC. But without otherwise addressing comments already given, afaics. One more remark: --- a/xen/common/xmalloc_tlsf.c +++ b/xen/common/xmalloc_tlsf.c @@ -140,9 +140,10 @@ static inline void MAPPING_SEARCH(unsigned long *r, int *fl, int *sl) *fl = flsl(*r) - 1; *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; *fl -= FLI_OFFSET; - /*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0! - *fl = *sl = 0; - */ +#if 0 + if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0! + fl = *sl = 0; You want to get indentation right here, and you don't want to lose the indirection on fl. +#endif *r &= ~t; } } If you split this to 4 patches, leaving the URL proposal in just the cover letter, then I think this one change (with the adjustments) could go in right away. Similarly I expect the arm64/flushtlb.h change could be ack-ed right away by an Arm maintainer. I actually dislike the proposal. In this case, the code is meant to look like assembly code. I would replace the // with ;. Also, I would like to keep the comment style in sync in arm32/flushtlb.h. So can this be updated as well? Cheers, Hi, Julien. I'm not authorized to send patches about files in the arm32 tree, but surely the patch can be easily replicated in any place where it makes sense for consistency. Regards, Nicola
Re: [PATCH v4 03/34] s390: Use pt_frag_refcount for pagetables
On Mon, Jun 12, 2023 at 02:03:52PM -0700, Vishal Moola (Oracle) wrote: > s390 currently uses _refcount to identify fragmented page tables. > The page table struct already has a member pt_frag_refcount used by > powerpc, so have s390 use that instead of the _refcount field as well. > This improves the safety for _refcount and the page table tracking. > > This also allows us to simplify the tracking since we can once again use > the lower byte of pt_frag_refcount instead of the upper byte of _refcount. > > Signed-off-by: Vishal Moola (Oracle) One nit below, otherwise Acked-by: Mike Rapoport (IBM) > --- > arch/s390/mm/pgalloc.c | 38 +++--- > 1 file changed, 15 insertions(+), 23 deletions(-) > > diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c > index 66ab68db9842..6b99932abc66 100644 > --- a/arch/s390/mm/pgalloc.c > +++ b/arch/s390/mm/pgalloc.c > @@ -182,20 +182,17 @@ void page_table_free_pgste(struct page *page) > * As follows from the above, no unallocated or fully allocated parent > * pages are contained in mm_context_t::pgtable_list. > * > - * The upper byte (bits 24-31) of the parent page _refcount is used > + * The lower byte (bits 0-7) of the parent page pt_frag_refcount is used > * for tracking contained 2KB-pgtables and has the following format: > * > * PP AA > - * 01234567upper byte (bits 24-31) of struct page::_refcount > + * 01234567upper byte (bits 0-7) of struct page::pt_frag_refcount Nit: lower > * || || > * || |+--- upper 2KB-pgtable is allocated > * || + lower 2KB-pgtable is allocated > * |+--- upper 2KB-pgtable is pending for removal > * + lower 2KB-pgtable is pending for removal > * > - * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why > - * using _refcount is possible). > - * > * When 2KB-pgtable is allocated the corresponding AA bit is set to 1. > * The parent page is either: > * - added to mm_context_t::pgtable_list in case the second half of the > @@ -243,11 +240,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm) > if (!list_empty(&mm->context.pgtable_list)) { > page = list_first_entry(&mm->context.pgtable_list, > struct page, lru); > - mask = atomic_read(&page->_refcount) >> 24; > + mask = atomic_read(&page->pt_frag_refcount); > /* >* The pending removal bits must also be checked. >* Failure to do so might lead to an impossible > - * value of (i.e 0x13 or 0x23) written to _refcount. > + * value of (i.e 0x13 or 0x23) written to > + * pt_frag_refcount. >* Such values violate the assumption that pending and >* allocation bits are mutually exclusive, and the rest >* of the code unrails as result. That could lead to > @@ -259,8 +257,8 @@ unsigned long *page_table_alloc(struct mm_struct *mm) > bit = mask & 1; /* =1 -> second 2K */ > if (bit) > table += PTRS_PER_PTE; > - atomic_xor_bits(&page->_refcount, > - 0x01U << (bit + 24)); > + atomic_xor_bits(&page->pt_frag_refcount, > + 0x01U << bit); > list_del(&page->lru); > } > } > @@ -281,12 +279,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm) > table = (unsigned long *) page_to_virt(page); > if (mm_alloc_pgste(mm)) { > /* Return 4K page table with PGSTEs */ > - atomic_xor_bits(&page->_refcount, 0x03U << 24); > + atomic_xor_bits(&page->pt_frag_refcount, 0x03U); > memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); > memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); > } else { > /* Return the first 2K fragment of the page */ > - atomic_xor_bits(&page->_refcount, 0x01U << 24); > + atomic_xor_bits(&page->pt_frag_refcount, 0x01U); > memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); > spin_lock_bh(&mm->context.lock); > list_add(&page->lru, &mm->context.pgtable_list); > @@ -323,22 +321,19 @@ void page_table_free(struct mm_struct *mm, unsigned > long *table) >* will happen outside of the critical section from this >* function or from __tlb_remove_table() >*/ > - mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); > - mask >>= 24; > + mask = atomic_xor_bits(&pa
Re: [PATCH] iommu/amd-vi: adjust _amd_iommu_flush_pages() to handle pseudo-domids
On Wed, Jun 14, 2023 at 02:58:14PM +0200, Jan Beulich wrote: > On 14.06.2023 10:32, Roger Pau Monne wrote: > > When the passed domain is DomIO iterate over the list of DomIO > > assigned devices and flush each pseudo-domid found. > > > > invalidate_all_domain_pages() does call amd_iommu_flush_all_pages() > > with DomIO as a parameter, > > Does it? Since the full function is visible in the patch (because of > the "While there ..." change), it seems pretty clear that it doesn't: > for_each_domain() iterates over ordinary domains only. Oh, I got confused by domain_create() returning early for system domains. > > and hence the underlying > > _amd_iommu_flush_pages() implementation must be capable of flushing > > all pseudo-domids used by the quarantine domain logic. > > While it didn't occur to me right away when we discussed this, it > may well be that I left alone all flushing when introducing the pseudo > domain IDs simply because no flushing would ever happen for the > quarantine domain. But the purpose of the calls to invalidate_all_devices() and invalidate_all_domain_pages() in amd_iommu_resume() is to cover up for the lack of Invalidate All support in the IOMMU, so flushing pseudo-domids is also required in order to flush all possible IOMMU state. Note that as part of invalidate_all_devices() we do invalidate DTEs for devices assigned to pseudo-domids, hence it seems natural that we also flush such pseudo-domids. > > While there fix invalidate_all_domain_pages() to only attempt to flush > > the domains that have IOMMU enabled, otherwise the flush is pointless. > > For the moment at least it looks to me as if this change alone wants > to go in. I would rather get the current patch with an added call to flush dom_io in invalidate_all_domain_pages(). Thanks, Roger.
Re: [PATCH v4 04/34] pgtable: Create struct ptdesc
On Mon, Jun 12, 2023 at 02:03:53PM -0700, Vishal Moola (Oracle) wrote: > Currently, page table information is stored within struct page. As part > of simplifying struct page, create struct ptdesc for page table > information. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > include/linux/pgtable.h | 51 + > 1 file changed, 51 insertions(+) > > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index c5a51481bbb9..330de96ebfd6 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -975,6 +975,57 @@ static inline void ptep_modify_prot_commit(struct > vm_area_struct *vma, > #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ > #endif /* CONFIG_MMU */ > > + > +/** > + * struct ptdesc - Memory descriptor for page tables. > + * @__page_flags: Same as page flags. Unused for page tables. > + * @pt_list: List of used page tables. Used for s390 and x86. > + * @_pt_pad_1: Padding that aliases with page's compound head. > + * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. > + * @_pt_s390_gaddr: Aliases with page's mapping. Used for s390 gmap only. > + * @pt_mm: Used for x86 pgds. > + * @pt_frag_refcount: For fragmented page table tracking. Powerpc and s390 > only. > + * @ptl: Lock for the page table. Do you mind aligning the descriptions by @pt_frag_refcount? I think it'll be more readable. > + * > + * This struct overlays struct page for now. Do not modify without a good > + * understanding of the issues. > + */ > +struct ptdesc { > + unsigned long __page_flags; > + > + union { > + struct list_head pt_list; > + struct { > + unsigned long _pt_pad_1; > + pgtable_t pmd_huge_pte; > + }; > + }; > + unsigned long _pt_s390_gaddr; > + > + union { > + struct mm_struct *pt_mm; > + atomic_t pt_frag_refcount; > + }; > + > +#if ALLOC_SPLIT_PTLOCKS > + spinlock_t *ptl; > +#else > + spinlock_t ptl; > +#endif > +}; > + > +#define TABLE_MATCH(pg, pt) \ > + static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) > +TABLE_MATCH(flags, __page_flags); > +TABLE_MATCH(compound_head, pt_list); > +TABLE_MATCH(compound_head, _pt_pad_1); > +TABLE_MATCH(pmd_huge_pte, pmd_huge_pte); > +TABLE_MATCH(mapping, _pt_s390_gaddr); > +TABLE_MATCH(pt_mm, pt_mm); > +TABLE_MATCH(ptl, ptl); > +#undef TABLE_MATCH > +static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); > + > /* > * No-op macros that just return the current protection value. Defined here > * because these macros can be used even if CONFIG_MMU is not defined. > -- > 2.40.1 > > -- Sincerely yours, Mike.
[qemu-mainline test] 181424: regressions - FAIL
flight 181424 qemu-mainline real [real] http://logs.test-lab.xenproject.org/osstest/logs/181424/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-amd64-xsm 6 xen-buildfail REGR. vs. 180691 build-arm64 6 xen-buildfail REGR. vs. 180691 build-arm64-xsm 6 xen-buildfail REGR. vs. 180691 build-amd64 6 xen-buildfail REGR. vs. 180691 build-i3866 xen-buildfail REGR. vs. 180691 build-i386-xsm6 xen-buildfail REGR. vs. 180691 build-armhf 6 xen-buildfail REGR. vs. 180691 Tests which did not succeed, but are not blocking: test-amd64-i386-xl-qemuu-debianhvm-amd64-shadow 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-debianhvm-i386-xsm 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-dmrestrict-amd64-dmrestrict 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-win7-amd64 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ws16-amd64 1 build-check(1) blocked n/a test-amd64-i386-xl-shadow 1 build-check(1) blocked n/a test-amd64-i386-xl-vhd1 build-check(1) blocked n/a test-amd64-i386-xl-xsm1 build-check(1) blocked n/a test-arm64-arm64-libvirt-raw 1 build-check(1) blocked n/a test-arm64-arm64-libvirt-xsm 1 build-check(1) blocked n/a test-arm64-arm64-xl 1 build-check(1) blocked n/a test-arm64-arm64-xl-credit1 1 build-check(1) blocked n/a test-arm64-arm64-xl-credit2 1 build-check(1) blocked n/a test-arm64-arm64-xl-thunderx 1 build-check(1) blocked n/a test-arm64-arm64-xl-vhd 1 build-check(1) blocked n/a test-arm64-arm64-xl-xsm 1 build-check(1) blocked n/a test-armhf-armhf-libvirt 1 build-check(1) blocked n/a test-armhf-armhf-libvirt-qcow2 1 build-check(1) blocked n/a test-armhf-armhf-libvirt-raw 1 build-check(1) blocked n/a test-armhf-armhf-xl 1 build-check(1) blocked n/a test-armhf-armhf-xl-arndale 1 build-check(1) blocked n/a test-armhf-armhf-xl-multivcpu 1 build-check(1) blocked n/a test-armhf-armhf-xl-rtds 1 build-check(1) blocked n/a test-armhf-armhf-xl-vhd 1 build-check(1) blocked n/a test-amd64-i386-qemuu-rhel6hvm-amd 1 build-check(1) blocked n/a test-amd64-i386-pair 1 build-check(1) blocked n/a test-amd64-i386-libvirt-xsm 1 build-check(1) blocked n/a test-amd64-i386-libvirt-raw 1 build-check(1) blocked n/a test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 1 build-check(1) blocked n/a test-amd64-i386-libvirt-pair 1 build-check(1) blocked n/a test-amd64-i386-libvirt 1 build-check(1) blocked n/a test-amd64-i386-freebsd10-i386 1 build-check(1) blocked n/a test-amd64-i386-freebsd10-amd64 1 build-check(1) blocked n/a test-amd64-coresched-i386-xl 1 build-check(1) blocked n/a test-amd64-coresched-amd64-xl 1 build-check(1) blocked n/a test-amd64-amd64-xl-xsm 1 build-check(1) blocked n/a build-amd64-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-xl-shadow1 build-check(1) blocked n/a test-amd64-amd64-xl-rtds 1 build-check(1) blocked n/a build-arm64-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-ws16-amd64 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-win7-amd64 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a build-armhf-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 1 build-check(1) blocked n/a build-i386-libvirt1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm 1 build-check(1) blocked n/a test-amd64-amd64-dom0pvh-xl-amd 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-debianhvm-amd64-shadow 1 build-check(1) blocked n/a test-amd64-amd64-dom0pvh-xl-intel 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-debianhvm-amd64 1 build-check(1)blocked n/a test-amd64-amd64-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-libvirt-pair 1 build-check(1) blocked n/a test-amd64-amd64-libvirt-qemuu-de
Re: [PATCH v4 05/34] mm: add utility functions for ptdesc
On Mon, Jun 12, 2023 at 02:03:54PM -0700, Vishal Moola (Oracle) wrote: > Introduce utility functions setting the foundation for ptdescs. These > will also assist in the splitting out of ptdesc from struct page. > > Functions that focus on the descriptor are prefixed with ptdesc_* while > functions that focus on the pagetable are prefixed with pagetable_*. > > pagetable_alloc() is defined to allocate new ptdesc pages as compound > pages. This is to standardize ptdescs by allowing for one allocation > and one free function, in contrast to 2 allocation and 2 free functions. > > Signed-off-by: Vishal Moola (Oracle) > --- > include/asm-generic/tlb.h | 11 +++ > include/linux/mm.h| 61 +++ > include/linux/pgtable.h | 12 > 3 files changed, 84 insertions(+) > > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > index b46617207c93..6bade9e0e799 100644 > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -481,6 +481,17 @@ static inline void tlb_remove_page(struct mmu_gather > *tlb, struct page *page) > return tlb_remove_page_size(tlb, page, PAGE_SIZE); > } > > +static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) > +{ > + tlb_remove_table(tlb, pt); > +} > + > +/* Like tlb_remove_ptdesc, but for page-like page directories. */ > +static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct > ptdesc *pt) > +{ > + tlb_remove_page(tlb, ptdesc_page(pt)); > +} > + > static inline void tlb_change_page_size(struct mmu_gather *tlb, >unsigned int page_size) > { > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 0db09639dd2d..f184f1eba85d 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2766,6 +2766,62 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, > pud_t *pud, unsigned long a > } > #endif /* CONFIG_MMU */ > > +static inline struct ptdesc *virt_to_ptdesc(const void *x) > +{ > + return page_ptdesc(virt_to_page(x)); > +} > + > +static inline void *ptdesc_to_virt(const struct ptdesc *pt) > +{ > + return page_to_virt(ptdesc_page(pt)); > +} > + > +static inline void *ptdesc_address(const struct ptdesc *pt) > +{ > + return folio_address(ptdesc_folio(pt)); > +} > + > +static inline bool pagetable_is_reserved(struct ptdesc *pt) > +{ > + return folio_test_reserved(ptdesc_folio(pt)); > +} > + > +/** > + * pagetable_alloc - Allocate pagetables > + * @gfp:GFP flags > + * @order: desired pagetable order > + * > + * pagetable_alloc allocates a page table descriptor as well as all pages > + * described by it. I think the order should be switched here to emphasize that primarily this method allocates memory for page tables. How about pagetable_alloc allocates memory for the page tables as well as a page table descriptor that describes the allocated memory > + * > + * Return: The ptdesc describing the allocated page tables. > + */ > +static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order) > +{ > + struct page *page = alloc_pages(gfp | __GFP_COMP, order); > + > + return page_ptdesc(page); > +} > + > +/** > + * pagetable_free - Free pagetables > + * @pt: The page table descriptor > + * > + * pagetable_free frees a page table descriptor as well as all page > + * tables described by said ptdesc. Similarly here. > + */ > +static inline void pagetable_free(struct ptdesc *pt) > +{ > + struct page *page = ptdesc_page(pt); > + > + __free_pages(page, compound_order(page)); > +} > + > +static inline void pagetable_clear(void *x) > +{ > + clear_page(x); > +} > + > #if USE_SPLIT_PTE_PTLOCKS > #if ALLOC_SPLIT_PTLOCKS > void __init ptlock_cache_init(void); > @@ -2992,6 +3048,11 @@ static inline void mark_page_reserved(struct page > *page) > adjust_managed_page_count(page, -1); > } > > +static inline void free_reserved_ptdesc(struct ptdesc *pt) > +{ > + free_reserved_page(ptdesc_page(pt)); > +} > + > /* > * Default method to free all the __init memory into the buddy system. > * The freed pages will be poisoned with pattern "poison" if it's within > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index 330de96ebfd6..c405f74d3875 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -1026,6 +1026,18 @@ TABLE_MATCH(ptl, ptl); > #undef TABLE_MATCH > static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); > > +#define ptdesc_page(pt) (_Generic((pt), > \ > + const struct ptdesc *: (const struct page *)(pt), \ > + struct ptdesc *:(struct page *)(pt))) > + > +#define ptdesc_folio(pt) (_Generic((pt), \ > + const struct ptdesc *: (const struct folio *)(pt), \ > + struct ptdesc *:(struct folio *)(pt))) > + > +#define page_ptdesc(p)
Re: [PATCH v4 06/34] mm: Convert pmd_pgtable_page() to pmd_ptdesc()
On Mon, Jun 12, 2023 at 02:03:55PM -0700, Vishal Moola (Oracle) wrote: > Converts pmd_pgtable_page() to pmd_ptdesc() and all its callers. This > removes some direct accesses to struct page, working towards splitting > out struct ptdesc from struct page. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > include/linux/mm.h | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index f184f1eba85d..088b7664f897 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2931,15 +2931,15 @@ static inline void pgtable_pte_page_dtor(struct page > *page) > > #if USE_SPLIT_PMD_PTLOCKS > > -static inline struct page *pmd_pgtable_page(pmd_t *pmd) > +static inline struct ptdesc *pmd_ptdesc(pmd_t *pmd) > { > unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); > - return virt_to_page((void *)((unsigned long) pmd & mask)); > + return virt_to_ptdesc((void *)((unsigned long) pmd & mask)); > } > > static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) > { > - return ptlock_ptr(pmd_pgtable_page(pmd)); > + return ptlock_ptr(ptdesc_page(pmd_ptdesc(pmd))); > } > > static inline bool pmd_ptlock_init(struct page *page) > @@ -2958,7 +2958,7 @@ static inline void pmd_ptlock_free(struct page *page) > ptlock_free(page); > } > > -#define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte) > +#define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) > > #else > > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH] iommu/amd-vi: adjust _amd_iommu_flush_pages() to handle pseudo-domids
On 14.06.2023 15:23, Roger Pau Monné wrote: > On Wed, Jun 14, 2023 at 02:58:14PM +0200, Jan Beulich wrote: >> On 14.06.2023 10:32, Roger Pau Monne wrote: >>> When the passed domain is DomIO iterate over the list of DomIO >>> assigned devices and flush each pseudo-domid found. >>> >>> invalidate_all_domain_pages() does call amd_iommu_flush_all_pages() >>> with DomIO as a parameter, >> >> Does it? Since the full function is visible in the patch (because of >> the "While there ..." change), it seems pretty clear that it doesn't: >> for_each_domain() iterates over ordinary domains only. > > Oh, I got confused by domain_create() returning early for system > domains. > >>> and hence the underlying >>> _amd_iommu_flush_pages() implementation must be capable of flushing >>> all pseudo-domids used by the quarantine domain logic. >> >> While it didn't occur to me right away when we discussed this, it >> may well be that I left alone all flushing when introducing the pseudo >> domain IDs simply because no flushing would ever happen for the >> quarantine domain. > > But the purpose of the calls to invalidate_all_devices() and > invalidate_all_domain_pages() in amd_iommu_resume() is to cover up for > the lack of Invalidate All support in the IOMMU, so flushing > pseudo-domids is also required in order to flush all possible IOMMU > state. > > Note that as part of invalidate_all_devices() we do invalidate DTEs > for devices assigned to pseudo-domids, hence it seems natural that we > also flush such pseudo-domids. > >>> While there fix invalidate_all_domain_pages() to only attempt to flush >>> the domains that have IOMMU enabled, otherwise the flush is pointless. >> >> For the moment at least it looks to me as if this change alone wants >> to go in. > > I would rather get the current patch with an added call to flush > dom_io in invalidate_all_domain_pages(). The question is: Is there anything that needs flushing for the quarantine domain. Right now I'm thinking that there isn't. Jan
Re: [PATCH v4 07/34] mm: Convert ptlock_alloc() to use ptdescs
On Mon, Jun 12, 2023 at 02:03:56PM -0700, Vishal Moola (Oracle) wrote: > This removes some direct accesses to struct page, working towards > splitting out struct ptdesc from struct page. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > include/linux/mm.h | 6 +++--- > mm/memory.c| 4 ++-- > 2 files changed, 5 insertions(+), 5 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 088b7664f897..e6f1be2a405e 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2825,7 +2825,7 @@ static inline void pagetable_clear(void *x) > #if USE_SPLIT_PTE_PTLOCKS > #if ALLOC_SPLIT_PTLOCKS > void __init ptlock_cache_init(void); > -extern bool ptlock_alloc(struct page *page); > +bool ptlock_alloc(struct ptdesc *ptdesc); > extern void ptlock_free(struct page *page); > > static inline spinlock_t *ptlock_ptr(struct page *page) > @@ -2837,7 +2837,7 @@ static inline void ptlock_cache_init(void) > { > } > > -static inline bool ptlock_alloc(struct page *page) > +static inline bool ptlock_alloc(struct ptdesc *ptdesc) > { > return true; > } > @@ -2867,7 +2867,7 @@ static inline bool ptlock_init(struct page *page) >* slab code uses page->slab_cache, which share storage with page->ptl. >*/ > VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); > - if (!ptlock_alloc(page)) > + if (!ptlock_alloc(page_ptdesc(page))) > return false; > spin_lock_init(ptlock_ptr(page)); > return true; > diff --git a/mm/memory.c b/mm/memory.c > index 80ce9dda2779..ba9579117686 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -5934,14 +5934,14 @@ void __init ptlock_cache_init(void) > SLAB_PANIC, NULL); > } > > -bool ptlock_alloc(struct page *page) > +bool ptlock_alloc(struct ptdesc *ptdesc) > { > spinlock_t *ptl; > > ptl = kmem_cache_alloc(page_ptl_cachep, GFP_KERNEL); > if (!ptl) > return false; > - page->ptl = ptl; > + ptdesc->ptl = ptl; > return true; > } > > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 08/34] mm: Convert ptlock_ptr() to use ptdescs
On Mon, Jun 12, 2023 at 02:03:57PM -0700, Vishal Moola (Oracle) wrote: > This removes some direct accesses to struct page, working towards > splitting out struct ptdesc from struct page. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/x86/xen/mmu_pv.c | 2 +- > include/linux/mm.h| 14 +++--- > 2 files changed, 8 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c > index b3b8d289b9ab..f469862e3ef4 100644 > --- a/arch/x86/xen/mmu_pv.c > +++ b/arch/x86/xen/mmu_pv.c > @@ -651,7 +651,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct > mm_struct *mm) > spinlock_t *ptl = NULL; > > #if USE_SPLIT_PTE_PTLOCKS > - ptl = ptlock_ptr(page); > + ptl = ptlock_ptr(page_ptdesc(page)); > spin_lock_nest_lock(ptl, &mm->page_table_lock); > #endif > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index e6f1be2a405e..bb934d51390f 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2828,9 +2828,9 @@ void __init ptlock_cache_init(void); > bool ptlock_alloc(struct ptdesc *ptdesc); > extern void ptlock_free(struct page *page); > > -static inline spinlock_t *ptlock_ptr(struct page *page) > +static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) > { > - return page->ptl; > + return ptdesc->ptl; > } > #else /* ALLOC_SPLIT_PTLOCKS */ > static inline void ptlock_cache_init(void) > @@ -2846,15 +2846,15 @@ static inline void ptlock_free(struct page *page) > { > } > > -static inline spinlock_t *ptlock_ptr(struct page *page) > +static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) > { > - return &page->ptl; > + return &ptdesc->ptl; > } > #endif /* ALLOC_SPLIT_PTLOCKS */ > > static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) > { > - return ptlock_ptr(pmd_page(*pmd)); > + return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); > } > > static inline bool ptlock_init(struct page *page) > @@ -2869,7 +2869,7 @@ static inline bool ptlock_init(struct page *page) > VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); > if (!ptlock_alloc(page_ptdesc(page))) > return false; > - spin_lock_init(ptlock_ptr(page)); > + spin_lock_init(ptlock_ptr(page_ptdesc(page))); > return true; > } > > @@ -2939,7 +2939,7 @@ static inline struct ptdesc *pmd_ptdesc(pmd_t *pmd) > > static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) > { > - return ptlock_ptr(ptdesc_page(pmd_ptdesc(pmd))); > + return ptlock_ptr(pmd_ptdesc(pmd)); > } > > static inline bool pmd_ptlock_init(struct page *page) > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 09/34] mm: Convert pmd_ptlock_init() to use ptdescs
On Mon, Jun 12, 2023 at 02:03:58PM -0700, Vishal Moola (Oracle) wrote: > This removes some direct accesses to struct page, working towards > splitting out struct ptdesc from struct page. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > include/linux/mm.h | 10 +- > 1 file changed, 5 insertions(+), 5 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index bb934d51390f..daecf1db6cf1 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2942,12 +2942,12 @@ static inline spinlock_t *pmd_lockptr(struct > mm_struct *mm, pmd_t *pmd) > return ptlock_ptr(pmd_ptdesc(pmd)); > } > > -static inline bool pmd_ptlock_init(struct page *page) > +static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) > { > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > - page->pmd_huge_pte = NULL; > + ptdesc->pmd_huge_pte = NULL; > #endif > - return ptlock_init(page); > + return ptlock_init(ptdesc_page(ptdesc)); > } > > static inline void pmd_ptlock_free(struct page *page) > @@ -2967,7 +2967,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct > *mm, pmd_t *pmd) > return &mm->page_table_lock; > } > > -static inline bool pmd_ptlock_init(struct page *page) { return true; } > +static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { return true; } > static inline void pmd_ptlock_free(struct page *page) {} > > #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) > @@ -2983,7 +2983,7 @@ static inline spinlock_t *pmd_lock(struct mm_struct > *mm, pmd_t *pmd) > > static inline bool pgtable_pmd_page_ctor(struct page *page) > { > - if (!pmd_ptlock_init(page)) > + if (!pmd_ptlock_init(page_ptdesc(page))) > return false; > __SetPageTable(page); > inc_lruvec_page_state(page, NR_PAGETABLE); > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 10/34] mm: Convert ptlock_init() to use ptdescs
On Mon, Jun 12, 2023 at 02:03:59PM -0700, Vishal Moola (Oracle) wrote: > This removes some direct accesses to struct page, working towards > splitting out struct ptdesc from struct page. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > include/linux/mm.h | 14 +++--- > 1 file changed, 7 insertions(+), 7 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index daecf1db6cf1..f48e626d9c98 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2857,7 +2857,7 @@ static inline spinlock_t *pte_lockptr(struct mm_struct > *mm, pmd_t *pmd) > return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); > } > > -static inline bool ptlock_init(struct page *page) > +static inline bool ptlock_init(struct ptdesc *ptdesc) > { > /* >* prep_new_page() initialize page->private (and therefore page->ptl) > @@ -2866,10 +2866,10 @@ static inline bool ptlock_init(struct page *page) >* It can happen if arch try to use slab for page table allocation: >* slab code uses page->slab_cache, which share storage with page->ptl. >*/ > - VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); > - if (!ptlock_alloc(page_ptdesc(page))) > + VM_BUG_ON_PAGE(*(unsigned long *)&ptdesc->ptl, ptdesc_page(ptdesc)); > + if (!ptlock_alloc(ptdesc)) > return false; > - spin_lock_init(ptlock_ptr(page_ptdesc(page))); > + spin_lock_init(ptlock_ptr(ptdesc)); > return true; > } > > @@ -2882,13 +2882,13 @@ static inline spinlock_t *pte_lockptr(struct > mm_struct *mm, pmd_t *pmd) > return &mm->page_table_lock; > } > static inline void ptlock_cache_init(void) {} > -static inline bool ptlock_init(struct page *page) { return true; } > +static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } > static inline void ptlock_free(struct page *page) {} > #endif /* USE_SPLIT_PTE_PTLOCKS */ > > static inline bool pgtable_pte_page_ctor(struct page *page) > { > - if (!ptlock_init(page)) > + if (!ptlock_init(page_ptdesc(page))) > return false; > __SetPageTable(page); > inc_lruvec_page_state(page, NR_PAGETABLE); > @@ -2947,7 +2947,7 @@ static inline bool pmd_ptlock_init(struct ptdesc > *ptdesc) > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > ptdesc->pmd_huge_pte = NULL; > #endif > - return ptlock_init(ptdesc_page(ptdesc)); > + return ptlock_init(ptdesc); > } > > static inline void pmd_ptlock_free(struct page *page) > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 11/34] mm: Convert pmd_ptlock_free() to use ptdescs
On Mon, Jun 12, 2023 at 02:04:00PM -0700, Vishal Moola (Oracle) wrote: > This removes some direct accesses to struct page, working towards > splitting out struct ptdesc from struct page. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > include/linux/mm.h | 10 +- > 1 file changed, 5 insertions(+), 5 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index f48e626d9c98..3b54bb4c9753 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2950,12 +2950,12 @@ static inline bool pmd_ptlock_init(struct ptdesc > *ptdesc) > return ptlock_init(ptdesc); > } > > -static inline void pmd_ptlock_free(struct page *page) > +static inline void pmd_ptlock_free(struct ptdesc *ptdesc) > { > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > - VM_BUG_ON_PAGE(page->pmd_huge_pte, page); > + VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc)); > #endif > - ptlock_free(page); > + ptlock_free(ptdesc_page(ptdesc)); > } > > #define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) > @@ -2968,7 +2968,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct > *mm, pmd_t *pmd) > } > > static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { return true; } > -static inline void pmd_ptlock_free(struct page *page) {} > +static inline void pmd_ptlock_free(struct ptdesc *ptdesc) {} > > #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) > > @@ -2992,7 +2992,7 @@ static inline bool pgtable_pmd_page_ctor(struct page > *page) > > static inline void pgtable_pmd_page_dtor(struct page *page) > { > - pmd_ptlock_free(page); > + pmd_ptlock_free(page_ptdesc(page)); > __ClearPageTable(page); > dec_lruvec_page_state(page, NR_PAGETABLE); > } > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 12/34] mm: Convert ptlock_free() to use ptdescs
On Mon, Jun 12, 2023 at 02:04:01PM -0700, Vishal Moola (Oracle) wrote: > This removes some direct accesses to struct page, working towards > splitting out struct ptdesc from struct page. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > include/linux/mm.h | 10 +- > mm/memory.c| 4 ++-- > 2 files changed, 7 insertions(+), 7 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 3b54bb4c9753..a1af7983e1bd 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2826,7 +2826,7 @@ static inline void pagetable_clear(void *x) > #if ALLOC_SPLIT_PTLOCKS > void __init ptlock_cache_init(void); > bool ptlock_alloc(struct ptdesc *ptdesc); > -extern void ptlock_free(struct page *page); > +void ptlock_free(struct ptdesc *ptdesc); > > static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) > { > @@ -2842,7 +2842,7 @@ static inline bool ptlock_alloc(struct ptdesc *ptdesc) > return true; > } > > -static inline void ptlock_free(struct page *page) > +static inline void ptlock_free(struct ptdesc *ptdesc) > { > } > > @@ -2883,7 +2883,7 @@ static inline spinlock_t *pte_lockptr(struct mm_struct > *mm, pmd_t *pmd) > } > static inline void ptlock_cache_init(void) {} > static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } > -static inline void ptlock_free(struct page *page) {} > +static inline void ptlock_free(struct ptdesc *ptdesc) {} > #endif /* USE_SPLIT_PTE_PTLOCKS */ > > static inline bool pgtable_pte_page_ctor(struct page *page) > @@ -2897,7 +2897,7 @@ static inline bool pgtable_pte_page_ctor(struct page > *page) > > static inline void pgtable_pte_page_dtor(struct page *page) > { > - ptlock_free(page); > + ptlock_free(page_ptdesc(page)); > __ClearPageTable(page); > dec_lruvec_page_state(page, NR_PAGETABLE); > } > @@ -2955,7 +2955,7 @@ static inline void pmd_ptlock_free(struct ptdesc > *ptdesc) > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc)); > #endif > - ptlock_free(ptdesc_page(ptdesc)); > + ptlock_free(ptdesc); > } > > #define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) > diff --git a/mm/memory.c b/mm/memory.c > index ba9579117686..d4d2ea5cf0fd 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -5945,8 +5945,8 @@ bool ptlock_alloc(struct ptdesc *ptdesc) > return true; > } > > -void ptlock_free(struct page *page) > +void ptlock_free(struct ptdesc *ptdesc) > { > - kmem_cache_free(page_ptl_cachep, page->ptl); > + kmem_cache_free(page_ptl_cachep, ptdesc->ptl); > } > #endif > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: Functions _spin_lock_cb() and handle_ro_raz()
On 14.06.2023 15:08, Federico Serafini wrote: > Hello everyone, > > I am working on the violations of MISRA C:2012 Rule 8.10, > whose headline says: > "An inline function shall be declared with the static storage class". > > For both ARM64 and X86_64 builds, > function _spin_lock_cb() defined in spinlock.c violates the rule. > Such function is declared in spinlock.h without > the inline function specifier: are there any reasons to do this? Since this function was mentioned elsewhere already, I'm afraid I have to be a little blunt and ask back: Did you check the history of the function. Yes, it is intentional to be that way, for the function to be inlined into _spin_lock(), and for it to also be available for external callers (we have just one right now, but that could change). > What about solving the violation by moving the function definition in > spinlock.h and declaring it as static inline? Did you try whether that would work at least purely mechanically? I'm afraid you'll find that it doesn't, because of LOCK_PROFILE_* being unavailable then. Yet we also don't want to expose all that in the header. In the earlier context I did suggest already to make the function an always-inline one in spinlock.c, under a slightly altered name, and then have _spin_lock_cb() be a trivial wrapper just like _spin_lock() is. I guess best is going to be if I make and post a patch ... Jan
Re: [PATCH v4 13/34] mm: Create ptdesc equivalents for pgtable_{pte,pmd}_page_{ctor,dtor}
On Mon, Jun 12, 2023 at 02:04:02PM -0700, Vishal Moola (Oracle) wrote: > Creates pagetable_pte_ctor(), pagetable_pmd_ctor(), pagetable_pte_dtor(), > and pagetable_pmd_dtor() and make the original pgtable > constructor/destructors wrappers. Nit: either "creates ... makes" or "create ... make" I like the second form more. > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > include/linux/mm.h | 56 ++ > 1 file changed, 42 insertions(+), 14 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index a1af7983e1bd..dc211c43610b 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2886,20 +2886,34 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) > { return true; } > static inline void ptlock_free(struct ptdesc *ptdesc) {} > #endif /* USE_SPLIT_PTE_PTLOCKS */ > > -static inline bool pgtable_pte_page_ctor(struct page *page) > +static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) > { > - if (!ptlock_init(page_ptdesc(page))) > + struct folio *folio = ptdesc_folio(ptdesc); > + > + if (!ptlock_init(ptdesc)) > return false; > - __SetPageTable(page); > - inc_lruvec_page_state(page, NR_PAGETABLE); > + __folio_set_table(folio); This comment is more to patch 1 ("mm: Add PAGE_TYPE_OP folio functions") It would be better to have _pgtable here, as "table" does not necessary mean page table. With PageType SetPageTable was fine, but with folio I think it should be more explicit. I'd add a third parameter to PAGE_TYPE_OPS for that. > + lruvec_stat_add_folio(folio, NR_PAGETABLE); > return true; > } > > +static inline bool pgtable_pte_page_ctor(struct page *page) > +{ > + return pagetable_pte_ctor(page_ptdesc(page)); > +} > + > +static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) > +{ > + struct folio *folio = ptdesc_folio(ptdesc); > + > + ptlock_free(ptdesc); > + __folio_clear_table(folio); > + lruvec_stat_sub_folio(folio, NR_PAGETABLE); > +} > + > static inline void pgtable_pte_page_dtor(struct page *page) > { > - ptlock_free(page_ptdesc(page)); > - __ClearPageTable(page); > - dec_lruvec_page_state(page, NR_PAGETABLE); > + pagetable_pte_dtor(page_ptdesc(page)); > } > > #define pte_offset_map_lock(mm, pmd, address, ptlp) \ > @@ -2981,20 +2995,34 @@ static inline spinlock_t *pmd_lock(struct mm_struct > *mm, pmd_t *pmd) > return ptl; > } > > -static inline bool pgtable_pmd_page_ctor(struct page *page) > +static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc) > { > - if (!pmd_ptlock_init(page_ptdesc(page))) > + struct folio *folio = ptdesc_folio(ptdesc); > + > + if (!pmd_ptlock_init(ptdesc)) > return false; > - __SetPageTable(page); > - inc_lruvec_page_state(page, NR_PAGETABLE); > + __folio_set_table(folio); > + lruvec_stat_add_folio(folio, NR_PAGETABLE); > return true; > } > > +static inline bool pgtable_pmd_page_ctor(struct page *page) > +{ > + return pagetable_pmd_ctor(page_ptdesc(page)); > +} > + > +static inline void pagetable_pmd_dtor(struct ptdesc *ptdesc) > +{ > + struct folio *folio = ptdesc_folio(ptdesc); > + > + pmd_ptlock_free(ptdesc); > + __folio_clear_table(folio); > + lruvec_stat_sub_folio(folio, NR_PAGETABLE); > +} > + > static inline void pgtable_pmd_page_dtor(struct page *page) > { > - pmd_ptlock_free(page_ptdesc(page)); > - __ClearPageTable(page); > - dec_lruvec_page_state(page, NR_PAGETABLE); > + pagetable_pmd_dtor(page_ptdesc(page)); > } > > /* > -- > 2.40.1 > > -- Sincerely yours, Mike.
[PATCH] spinlock: alter inlining of _spin_lock_cb()
To comply with Misra rule 8.10 ("An inline function shall be declared with the static storage class"), convert what is presently _spin_lock_cb() to an always-inline (and static) helper, while making the function itself a thin wrapper, just like _spin_lock() is. While there drop the unlikely() from the callback check, and correct indentation in _spin_lock(). Signed-off-by: Jan Beulich --- a/xen/common/spinlock.c +++ b/xen/common/spinlock.c @@ -304,7 +304,8 @@ static always_inline u16 observe_head(sp return read_atomic(&t->head); } -void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data) +static void always_inline spin_lock_common(spinlock_t *lock, + void (*cb)(void *), void *data) { spinlock_tickets_t tickets = SPINLOCK_TICKET_INC; LOCK_PROFILE_VAR; @@ -316,7 +317,7 @@ void inline _spin_lock_cb(spinlock_t *lo while ( tickets.tail != observe_head(&lock->tickets) ) { LOCK_PROFILE_BLOCK; -if ( unlikely(cb) ) +if ( cb ) cb(data); arch_lock_relax(); } @@ -327,7 +328,12 @@ void inline _spin_lock_cb(spinlock_t *lo void _spin_lock(spinlock_t *lock) { - _spin_lock_cb(lock, NULL, NULL); +spin_lock_common(lock, NULL, NULL); +} + +void _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data) +{ +spin_lock_common(lock, cb, data); } void _spin_lock_irq(spinlock_t *lock)
Re: [PATCH v4 14/34] powerpc: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:03PM -0700, Vishal Moola (Oracle) wrote: > In order to split struct ptdesc from struct page, convert various > functions to use ptdescs. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/powerpc/mm/book3s64/mmu_context.c | 10 +++--- > arch/powerpc/mm/book3s64/pgtable.c | 32 +- > arch/powerpc/mm/pgtable-frag.c | 46 +- > 3 files changed, 44 insertions(+), 44 deletions(-) > > diff --git a/arch/powerpc/mm/book3s64/mmu_context.c > b/arch/powerpc/mm/book3s64/mmu_context.c > index c766e4c26e42..1715b07c630c 100644 > --- a/arch/powerpc/mm/book3s64/mmu_context.c > +++ b/arch/powerpc/mm/book3s64/mmu_context.c > @@ -246,15 +246,15 @@ static void destroy_contexts(mm_context_t *ctx) > static void pmd_frag_destroy(void *pmd_frag) > { > int count; > - struct page *page; > + struct ptdesc *ptdesc; > > - page = virt_to_page(pmd_frag); > + ptdesc = virt_to_ptdesc(pmd_frag); > /* drop all the pending references */ > count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT; > /* We allow PTE_FRAG_NR fragments from a PTE page */ > - if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) { > - pgtable_pmd_page_dtor(page); > - __free_page(page); > + if (atomic_sub_and_test(PMD_FRAG_NR - count, > &ptdesc->pt_frag_refcount)) { > + pagetable_pmd_dtor(ptdesc); > + pagetable_free(ptdesc); > } > } > > diff --git a/arch/powerpc/mm/book3s64/pgtable.c > b/arch/powerpc/mm/book3s64/pgtable.c > index 85c84e89e3ea..1212deeabe15 100644 > --- a/arch/powerpc/mm/book3s64/pgtable.c > +++ b/arch/powerpc/mm/book3s64/pgtable.c > @@ -306,22 +306,22 @@ static pmd_t *get_pmd_from_cache(struct mm_struct *mm) > static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm) > { > void *ret = NULL; > - struct page *page; > + struct ptdesc *ptdesc; > gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO; > > if (mm == &init_mm) > gfp &= ~__GFP_ACCOUNT; > - page = alloc_page(gfp); > - if (!page) > + ptdesc = pagetable_alloc(gfp, 0); > + if (!ptdesc) > return NULL; > - if (!pgtable_pmd_page_ctor(page)) { > - __free_pages(page, 0); > + if (!pagetable_pmd_ctor(ptdesc)) { > + pagetable_free(ptdesc); > return NULL; > } > > - atomic_set(&page->pt_frag_refcount, 1); > + atomic_set(&ptdesc->pt_frag_refcount, 1); > > - ret = page_address(page); > + ret = ptdesc_address(ptdesc); > /* >* if we support only one fragment just return the >* allocated page. > @@ -331,12 +331,12 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm) > > spin_lock(&mm->page_table_lock); > /* > - * If we find pgtable_page set, we return > + * If we find ptdesc_page set, we return >* the allocated page with single fragment >* count. >*/ > if (likely(!mm->context.pmd_frag)) { > - atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR); > + atomic_set(&ptdesc->pt_frag_refcount, PMD_FRAG_NR); > mm->context.pmd_frag = ret + PMD_FRAG_SIZE; > } > spin_unlock(&mm->page_table_lock); > @@ -357,15 +357,15 @@ pmd_t *pmd_fragment_alloc(struct mm_struct *mm, > unsigned long vmaddr) > > void pmd_fragment_free(unsigned long *pmd) > { > - struct page *page = virt_to_page(pmd); > + struct ptdesc *ptdesc = virt_to_ptdesc(pmd); > > - if (PageReserved(page)) > - return free_reserved_page(page); > + if (pagetable_is_reserved(ptdesc)) > + return free_reserved_ptdesc(ptdesc); > > - BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); > - if (atomic_dec_and_test(&page->pt_frag_refcount)) { > - pgtable_pmd_page_dtor(page); > - __free_page(page); > + BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0); > + if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) { > + pagetable_pmd_dtor(ptdesc); > + pagetable_free(ptdesc); > } > } > > diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c > index 20652daa1d7e..8961f1540209 100644 > --- a/arch/powerpc/mm/pgtable-frag.c > +++ b/arch/powerpc/mm/pgtable-frag.c > @@ -18,15 +18,15 @@ > void pte_frag_destroy(void *pte_frag) > { > int count; > - struct page *page; > + struct ptdesc *ptdesc; > > - page = virt_to_page(pte_frag); > + ptdesc = virt_to_ptdesc(pte_frag); > /* drop all the pending references */ > count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT; > /* We allow PTE_FRAG_NR fragments from a PTE page */ > - if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) { > - pgtable_pte_page_dtor(page); > - __free_page(page);
[xen-unstable-smoke test] 181426: tolerable all pass - PUSHED
flight 181426 xen-unstable-smoke real [real] http://logs.test-lab.xenproject.org/osstest/logs/181426/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-xl-xsm 16 saverestore-support-checkfail never pass test-armhf-armhf-xl 15 migrate-support-checkfail never pass test-armhf-armhf-xl 16 saverestore-support-checkfail never pass version targeted for testing: xen 87c621d0ef75e5f95987d66811ed1fd7129208d1 baseline version: xen 2f69ef96801f0d2b9646abf6396e60f99c56e3a0 Last test of basis 181407 2023-06-13 14:00:28 Z1 days Testing same since 181426 2023-06-14 11:02:37 Z0 days1 attempts People who touched revisions under test: Anthony PERARD Jan Beulich jobs: build-arm64-xsm pass build-amd64 pass build-armhf pass build-amd64-libvirt pass test-armhf-armhf-xl pass test-arm64-arm64-xl-xsm pass test-amd64-amd64-xl-qemuu-debianhvm-amd64pass test-amd64-amd64-libvirt pass sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in general, is at http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master Test harness code can be found at http://xenbits.xen.org/gitweb?p=osstest.git;a=summary Pushing revision : To xenbits.xen.org:/home/xen/git/xen.git 2f69ef9680..87c621d0ef 87c621d0ef75e5f95987d66811ed1fd7129208d1 -> smoke
Re: [PATCH v4 15/34] x86: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:04PM -0700, Vishal Moola (Oracle) wrote: > In order to split struct ptdesc from struct page, convert various > functions to use ptdescs. > > Some of the functions use the *get*page*() helper functions. Convert Nit: *get_free_page*() > these to use pagetable_alloc() and ptdesc_address() instead to help > standardize page tables further. More importantly, get_free_pages() ensures a page won't be allocated from HIGHMEM, and for 32-bits this is a must. > Signed-off-by: Vishal Moola (Oracle) > --- > arch/x86/mm/pgtable.c | 46 +-- > 1 file changed, 27 insertions(+), 19 deletions(-) > > diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c > index 15a8009a4480..6da7fd5d4782 100644 > --- a/arch/x86/mm/pgtable.c > +++ b/arch/x86/mm/pgtable.c > @@ -52,7 +52,7 @@ early_param("userpte", setup_userpte); > > void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) > { > - pgtable_pte_page_dtor(pte); > + pagetable_pte_dtor(page_ptdesc(pte)); > paravirt_release_pte(page_to_pfn(pte)); > paravirt_tlb_remove_table(tlb, pte); > } > @@ -60,7 +60,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page > *pte) > #if CONFIG_PGTABLE_LEVELS > 2 > void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) > { > - struct page *page = virt_to_page(pmd); > + struct ptdesc *ptdesc = virt_to_ptdesc(pmd); > paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); > /* >* NOTE! For PAE, any changes to the top page-directory-pointer-table > @@ -69,8 +69,8 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) > #ifdef CONFIG_X86_PAE > tlb->need_flush_all = 1; > #endif > - pgtable_pmd_page_dtor(page); > - paravirt_tlb_remove_table(tlb, page); > + pagetable_pmd_dtor(ptdesc); > + paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc)); > } > > #if CONFIG_PGTABLE_LEVELS > 3 > @@ -92,16 +92,16 @@ void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) > > static inline void pgd_list_add(pgd_t *pgd) > { > - struct page *page = virt_to_page(pgd); > + struct ptdesc *ptdesc = virt_to_ptdesc(pgd); > > - list_add(&page->lru, &pgd_list); > + list_add(&ptdesc->pt_list, &pgd_list); > } > > static inline void pgd_list_del(pgd_t *pgd) > { > - struct page *page = virt_to_page(pgd); > + struct ptdesc *ptdesc = virt_to_ptdesc(pgd); > > - list_del(&page->lru); > + list_del(&ptdesc->pt_list); > } > > #define UNSHARED_PTRS_PER_PGD\ > @@ -112,12 +112,12 @@ static inline void pgd_list_del(pgd_t *pgd) > > static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) > { > - virt_to_page(pgd)->pt_mm = mm; > + virt_to_ptdesc(pgd)->pt_mm = mm; > } > > struct mm_struct *pgd_page_get_mm(struct page *page) > { > - return page->pt_mm; > + return page_ptdesc(page)->pt_mm; > } > > static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) > @@ -213,11 +213,14 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, > pmd_t *pmd) > static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) > { > int i; > + struct ptdesc *ptdesc; > > for (i = 0; i < count; i++) > if (pmds[i]) { > - pgtable_pmd_page_dtor(virt_to_page(pmds[i])); > - free_page((unsigned long)pmds[i]); > + ptdesc = virt_to_ptdesc(pmds[i]); > + > + pagetable_pmd_dtor(ptdesc); > + pagetable_free(ptdesc); > mm_dec_nr_pmds(mm); > } > } > @@ -232,16 +235,21 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t > *pmds[], int count) > gfp &= ~__GFP_ACCOUNT; > > for (i = 0; i < count; i++) { > - pmd_t *pmd = (pmd_t *)__get_free_page(gfp); > - if (!pmd) > + pmd_t *pmd = NULL; > + struct ptdesc *ptdesc = pagetable_alloc(gfp, 0); > + > + if (!ptdesc) > failed = true; > - if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) { > - free_page((unsigned long)pmd); > - pmd = NULL; > + if (ptdesc && !pagetable_pmd_ctor(ptdesc)) { > + pagetable_free(ptdesc); > + ptdesc = NULL; > failed = true; > } > - if (pmd) > + if (ptdesc) { > mm_inc_nr_pmds(mm); > + pmd = ptdesc_address(ptdesc); > + } > + > pmds[i] = pmd; > } > > @@ -830,7 +838,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) > > free_page((unsigned long)pmd_sv); > > - pgtable_pmd_page_dtor(virt_to_page(pmd)); > + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); > free_page((unsigned long)pmd); > > return 1; > -- > 2.40.1 > >
Re: [XEN PATCH] xen: fixed violations of MISRA C:2012 Rule 3.1
On 13/06/2023 8:42 am, Nicola Vetrini wrote: > diff --git a/xen/common/xmalloc_tlsf.c b/xen/common/xmalloc_tlsf.c > index 75bdf18c4e..ea6ec47a59 100644 > --- a/xen/common/xmalloc_tlsf.c > +++ b/xen/common/xmalloc_tlsf.c > @@ -140,9 +140,10 @@ static inline void MAPPING_SEARCH(unsigned long *r, int > *fl, int *sl) > *fl = flsl(*r) - 1; > *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; > *fl -= FLI_OFFSET; > -/*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0! > - *fl = *sl = 0; > - */ > +#if 0 > +if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0! > +fl = *sl = 0; > +#endif > *r &= ~t; > } > } This logic has been commented out right from it's introduction in c/s 9736b76d829b2d in 2008, and never touched since. I think it can safely be deleted, and not placed inside an #if 0. ~Andrew
[libvirt test] 181418: tolerable all pass - PUSHED
flight 181418 libvirt real [real] http://logs.test-lab.xenproject.org/osstest/logs/181418/ Failures :-/ but no regressions. Tests which did not succeed, but are not blocking: test-armhf-armhf-libvirt 16 saverestore-support-checkfail like 181374 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check fail like 181374 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail like 181374 test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-i386-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-i386-libvirt 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail never pass test-arm64-arm64-libvirt 15 migrate-support-checkfail never pass test-arm64-arm64-libvirt 16 saverestore-support-checkfail never pass test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-amd64-i386-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-qcow2 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-qcow2 15 saverestore-support-checkfail never pass test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-libvirt 15 migrate-support-checkfail never pass test-armhf-armhf-libvirt-qcow2 14 migrate-support-checkfail never pass test-armhf-armhf-libvirt-raw 14 migrate-support-checkfail never pass version targeted for testing: libvirt a7ee9eac835324854483a231d7931b9329f259bc baseline version: libvirt 97f0bd00b4d055f2329392d2f8b7fe566fc65901 Last test of basis 181374 2023-06-11 04:18:49 Z3 days Testing same since 181401 2023-06-13 04:20:27 Z1 days2 attempts People who touched revisions under test: Ján Tomko Michal Privoznik jobs: build-amd64-xsm pass build-arm64-xsm pass build-i386-xsm pass build-amd64 pass build-arm64 pass build-armhf pass build-i386 pass build-amd64-libvirt pass build-arm64-libvirt pass build-armhf-libvirt pass build-i386-libvirt pass build-amd64-pvopspass build-arm64-pvopspass build-armhf-pvopspass build-i386-pvops pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm pass test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsmpass test-amd64-amd64-libvirt-xsm pass test-arm64-arm64-libvirt-xsm pass test-amd64-i386-libvirt-xsm pass test-amd64-amd64-libvirt pass test-arm64-arm64-libvirt pass test-armhf-armhf-libvirt pass test-amd64-i386-libvirt pass test-amd64-amd64-libvirt-pairpass test-amd64-i386-libvirt-pair pass test-arm64-arm64-libvirt-qcow2 pass test-armhf-armhf-libvirt-qcow2 pass test-arm64-arm64-libvirt-raw pass test-armhf-armhf-libvirt-raw pass test-amd64-i386-libvirt-raw pass test-amd64-amd64-libvirt-vhd pass sg-report-flight on osstest.test-lab.xenproject.org logs: /home/logs/logs images: /home/logs/images Logs, config files, etc. are available at http://logs.test-lab.xenproject.org/osstest/logs Explanation of these reports, and of osstest in
Re: [PATCH v4 16/34] s390: Convert various gmap functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:05PM -0700, Vishal Moola (Oracle) wrote: > In order to split struct ptdesc from struct page, convert various > functions to use ptdescs. > > Some of the functions use the *get*page*() helper functions. Convert > these to use pagetable_alloc() and ptdesc_address() instead to help > standardize page tables further. > > Signed-off-by: Vishal Moola (Oracle) With folding ptdesc->_pt_s390_gaddr = 0; into pagetable_free() Acked-by: Mike Rapoport (IBM) > --- > arch/s390/mm/gmap.c | 230 > 1 file changed, 128 insertions(+), 102 deletions(-) > > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c > index 81c683426b49..010e87df7299 100644 > --- a/arch/s390/mm/gmap.c > +++ b/arch/s390/mm/gmap.c > @@ -34,7 +34,7 @@ > static struct gmap *gmap_alloc(unsigned long limit) > { > struct gmap *gmap; > - struct page *page; > + struct ptdesc *ptdesc; > unsigned long *table; > unsigned long etype, atype; > > @@ -67,12 +67,12 @@ static struct gmap *gmap_alloc(unsigned long limit) > spin_lock_init(&gmap->guest_table_lock); > spin_lock_init(&gmap->shadow_lock); > refcount_set(&gmap->ref_count, 1); > - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); > - if (!page) > + ptdesc = pagetable_alloc(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); > + if (!ptdesc) > goto out_free; > - page->_pt_s390_gaddr = 0; > - list_add(&page->lru, &gmap->crst_list); > - table = page_to_virt(page); > + ptdesc->_pt_s390_gaddr = 0; > + list_add(&ptdesc->pt_list, &gmap->crst_list); > + table = ptdesc_to_virt(ptdesc); > crst_table_init(table, etype); > gmap->table = table; > gmap->asce = atype | _ASCE_TABLE_LENGTH | > @@ -181,25 +181,25 @@ static void gmap_rmap_radix_tree_free(struct > radix_tree_root *root) > */ > static void gmap_free(struct gmap *gmap) > { > - struct page *page, *next; > + struct ptdesc *ptdesc, *next; > > /* Flush tlb of all gmaps (if not already done for shadows) */ > if (!(gmap_is_shadow(gmap) && gmap->removed)) > gmap_flush_tlb(gmap); > /* Free all segment & region tables. */ > - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { > - page->_pt_s390_gaddr = 0; > - __free_pages(page, CRST_ALLOC_ORDER); > + list_for_each_entry_safe(ptdesc, next, &gmap->crst_list, pt_list) { > + ptdesc->_pt_s390_gaddr = 0; > + pagetable_free(ptdesc); > } > gmap_radix_tree_free(&gmap->guest_to_host); > gmap_radix_tree_free(&gmap->host_to_guest); > > /* Free additional data for a shadow gmap */ > if (gmap_is_shadow(gmap)) { > - /* Free all page tables. */ > - list_for_each_entry_safe(page, next, &gmap->pt_list, lru) { > - page->_pt_s390_gaddr = 0; > - page_table_free_pgste(page); > + /* Free all ptdesc tables. */ > + list_for_each_entry_safe(ptdesc, next, &gmap->pt_list, pt_list) > { > + ptdesc->_pt_s390_gaddr = 0; > + page_table_free_pgste(ptdesc_page(ptdesc)); > } > gmap_rmap_radix_tree_free(&gmap->host_to_rmap); > /* Release reference to the parent */ > @@ -308,27 +308,27 @@ EXPORT_SYMBOL_GPL(gmap_get_enabled); > static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, > unsigned long init, unsigned long gaddr) > { > - struct page *page; > + struct ptdesc *ptdesc; > unsigned long *new; > > /* since we dont free the gmap table until gmap_free we can unlock */ > - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); > - if (!page) > + ptdesc = pagetable_alloc(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); > + if (!ptdesc) > return -ENOMEM; > - new = page_to_virt(page); > + new = ptdesc_to_virt(ptdesc); > crst_table_init(new, init); > spin_lock(&gmap->guest_table_lock); > if (*table & _REGION_ENTRY_INVALID) { > - list_add(&page->lru, &gmap->crst_list); > + list_add(&ptdesc->pt_list, &gmap->crst_list); > *table = __pa(new) | _REGION_ENTRY_LENGTH | > (*table & _REGION_ENTRY_TYPE_MASK); > - page->_pt_s390_gaddr = gaddr; > - page = NULL; > + ptdesc->_pt_s390_gaddr = gaddr; > + ptdesc = NULL; > } > spin_unlock(&gmap->guest_table_lock); > - if (page) { > - page->_pt_s390_gaddr = 0; > - __free_pages(page, CRST_ALLOC_ORDER); > + if (ptdesc) { > + ptdesc->_pt_s390_gaddr = 0; > + pagetable_free(ptdesc); > } > return 0; > } > @@ -341,15 +341,15 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned > long *table, > */ > static unsigned
Re: [PATCH v4 17/34] s390: Convert various pgalloc functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:06PM -0700, Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > Some of the functions use the *get*page*() helper functions. Convert > these to use pagetable_alloc() and ptdesc_address() instead to help > standardize page tables further. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/s390/include/asm/pgalloc.h | 4 +- > arch/s390/include/asm/tlb.h | 4 +- > arch/s390/mm/pgalloc.c | 108 > 3 files changed, 59 insertions(+), 57 deletions(-) > > diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h > index 17eb618f1348..00ad9b88fda9 100644 > --- a/arch/s390/include/asm/pgalloc.h > +++ b/arch/s390/include/asm/pgalloc.h > @@ -86,7 +86,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, > unsigned long vmaddr) > if (!table) > return NULL; > crst_table_init(table, _SEGMENT_ENTRY_EMPTY); > - if (!pgtable_pmd_page_ctor(virt_to_page(table))) { > + if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) { > crst_table_free(mm, table); > return NULL; > } > @@ -97,7 +97,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t > *pmd) > { > if (mm_pmd_folded(mm)) > return; > - pgtable_pmd_page_dtor(virt_to_page(pmd)); > + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); > crst_table_free(mm, (unsigned long *) pmd); > } > > diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h > index b91f4a9b044c..383b1f91442c 100644 > --- a/arch/s390/include/asm/tlb.h > +++ b/arch/s390/include/asm/tlb.h > @@ -89,12 +89,12 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, > pmd_t *pmd, > { > if (mm_pmd_folded(tlb->mm)) > return; > - pgtable_pmd_page_dtor(virt_to_page(pmd)); > + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); > __tlb_adjust_range(tlb, address, PAGE_SIZE); > tlb->mm->context.flush_mm = 1; > tlb->freed_tables = 1; > tlb->cleared_puds = 1; > - tlb_remove_table(tlb, pmd); > + tlb_remove_ptdesc(tlb, pmd); > } > > /* > diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c > index 6b99932abc66..eeb7c95b98cf 100644 > --- a/arch/s390/mm/pgalloc.c > +++ b/arch/s390/mm/pgalloc.c > @@ -43,17 +43,17 @@ __initcall(page_table_register_sysctl); > > unsigned long *crst_table_alloc(struct mm_struct *mm) > { > - struct page *page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); > + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); > > - if (!page) > + if (!ptdesc) > return NULL; > - arch_set_page_dat(page, CRST_ALLOC_ORDER); > - return (unsigned long *) page_to_virt(page); > + arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER); > + return (unsigned long *) ptdesc_to_virt(ptdesc); > } > > void crst_table_free(struct mm_struct *mm, unsigned long *table) > { > - free_pages((unsigned long)table, CRST_ALLOC_ORDER); > + pagetable_free(virt_to_ptdesc(table)); > } > > static void __crst_table_upgrade(void *arg) > @@ -140,21 +140,21 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, > unsigned int bits) > > struct page *page_table_alloc_pgste(struct mm_struct *mm) > { > - struct page *page; > + struct ptdesc *ptdesc; > u64 *table; > > - page = alloc_page(GFP_KERNEL); > - if (page) { > - table = (u64 *)page_to_virt(page); > + ptdesc = pagetable_alloc(GFP_KERNEL, 0); > + if (ptdesc) { > + table = (u64 *)ptdesc_to_virt(ptdesc); > memset64(table, _PAGE_INVALID, PTRS_PER_PTE); > memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); > } > - return page; > + return ptdesc_page(ptdesc); > } > > void page_table_free_pgste(struct page *page) > { > - __free_page(page); > + pagetable_free(page_ptdesc(page)); > } > > #endif /* CONFIG_PGSTE */ > @@ -230,7 +230,7 @@ void page_table_free_pgste(struct page *page) > unsigned long *page_table_alloc(struct mm_struct *mm) > { > unsigned long *table; > - struct page *page; > + struct ptdesc *ptdesc; > unsigned int mask, bit; > > /* Try to get a fragment of a 4K page as a 2K page table */ > @@ -238,9 +238,9 @@ unsigned long *page_table_alloc(struct mm_struct *mm) > table = NULL; > spin_lock_bh(&mm->context.lock); > if (!list_empty(&mm->context.pgtable_list)) { > - page = list_first_entry(&mm->context.pgtable_list, > - struct page, lru); > - mask = atomic_read(&page->pt_frag_refcount); > + ptdesc = list_first_entry(&mm->context.pgtable_list, > +
Re: [PATCH v4 18/34] mm: Remove page table members from struct page
On Mon, Jun 12, 2023 at 02:04:07PM -0700, Vishal Moola (Oracle) wrote: > The page table members are now split out into their own ptdesc struct. > Remove them from struct page. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > include/linux/mm_types.h | 14 -- > include/linux/pgtable.h | 3 --- > 2 files changed, 17 deletions(-) > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index 6161fe1ae5b8..31ffa1be21d0 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -141,20 +141,6 @@ struct page { > struct {/* Tail pages of compound page */ > unsigned long compound_head;/* Bit zero is set */ > }; > - struct {/* Page table pages */ > - unsigned long _pt_pad_1;/* compound_head */ > - pgtable_t pmd_huge_pte; /* protected by page->ptl */ > - unsigned long _pt_s390_gaddr; /* mapping */ > - union { > - struct mm_struct *pt_mm; /* x86 pgds only */ > - atomic_t pt_frag_refcount; /* powerpc */ > - }; > -#if ALLOC_SPLIT_PTLOCKS > - spinlock_t *ptl; > -#else > - spinlock_t ptl; > -#endif > - }; > struct {/* ZONE_DEVICE pages */ > /** @pgmap: Points to the hosting device page map. */ > struct dev_pagemap *pgmap; > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index c405f74d3875..33cc19d752b3 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -1019,10 +1019,7 @@ struct ptdesc { > TABLE_MATCH(flags, __page_flags); > TABLE_MATCH(compound_head, pt_list); > TABLE_MATCH(compound_head, _pt_pad_1); > -TABLE_MATCH(pmd_huge_pte, pmd_huge_pte); > TABLE_MATCH(mapping, _pt_s390_gaddr); > -TABLE_MATCH(pt_mm, pt_mm); > -TABLE_MATCH(ptl, ptl); > #undef TABLE_MATCH > static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); > > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 19/34] pgalloc: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:08PM -0700, Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > Some of the functions use the *get*page*() helper functions. Convert > these to use pagetable_alloc() and ptdesc_address() instead to help > standardize page tables further. > > Signed-off-by: Vishal Moola (Oracle) > --- > include/asm-generic/pgalloc.h | 62 +-- > 1 file changed, 37 insertions(+), 25 deletions(-) > > diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h > index a7cf825befae..3fd6ce79e654 100644 > --- a/include/asm-generic/pgalloc.h > +++ b/include/asm-generic/pgalloc.h > @@ -18,7 +18,11 @@ > */ > static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm) > { > - return (pte_t *)__get_free_page(GFP_PGTABLE_KERNEL); > + struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL, 0); > + > + if (!ptdesc) > + return NULL; > + return ptdesc_address(ptdesc); > } > > #ifndef __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL > @@ -41,7 +45,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct > *mm) > */ > static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) > { > - free_page((unsigned long)pte); > + pagetable_free(virt_to_ptdesc(pte)); > } > > /** > @@ -49,7 +53,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, > pte_t *pte) > * @mm: the mm_struct of the current context > * @gfp: GFP flags to use for the allocation > * > - * Allocates a page and runs the pgtable_pte_page_ctor(). > + * Allocates a ptdesc and runs the pagetable_pte_ctor(). Allocates memory for page table and ptdesc > * > * This function is intended for architectures that need > * anything beyond simple page allocation or must have custom GFP flags. The Return: description here should be fixed up > @@ -58,17 +62,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, > pte_t *pte) > */ > static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) > { > - struct page *pte; > + struct ptdesc *ptdesc; > > - pte = alloc_page(gfp); > - if (!pte) > + ptdesc = pagetable_alloc(gfp, 0); > + if (!ptdesc) > return NULL; > - if (!pgtable_pte_page_ctor(pte)) { > - __free_page(pte); > + if (!pagetable_pte_ctor(ptdesc)) { > + pagetable_free(ptdesc); > return NULL; > } > > - return pte; > + return ptdesc_page(ptdesc); > } > > #ifndef __HAVE_ARCH_PTE_ALLOC_ONE > @@ -76,7 +80,7 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct > *mm, gfp_t gfp) > * pte_alloc_one - allocate a page for PTE-level user page table > * @mm: the mm_struct of the current context > * > - * Allocates a page and runs the pgtable_pte_page_ctor(). > + * Allocates a ptdesc and runs the pagetable_pte_ctor(). Allocates memory for page table and ptdesc > * > * Return: `struct page` initialized as page table or %NULL on error Return: ptdesc ... > */ > @@ -98,8 +102,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct > *mm) > */ > static inline void pte_free(struct mm_struct *mm, struct page *pte_page) > { > - pgtable_pte_page_dtor(pte_page); > - __free_page(pte_page); > + struct ptdesc *ptdesc = page_ptdesc(pte_page); > + > + pagetable_pte_dtor(ptdesc); > + pagetable_free(ptdesc); > } > > > @@ -110,7 +116,7 @@ static inline void pte_free(struct mm_struct *mm, struct > page *pte_page) > * pmd_alloc_one - allocate a page for PMD-level page table > * @mm: the mm_struct of the current context > * > - * Allocates a page and runs the pgtable_pmd_page_ctor(). > + * Allocates a ptdesc and runs the pagetable_pmd_ctor(). Allocate memory for page table and ptdesc > * Allocations use %GFP_PGTABLE_USER in user context and > * %GFP_PGTABLE_KERNEL in kernel context. > * > @@ -118,28 +124,30 @@ static inline void pte_free(struct mm_struct *mm, > struct page *pte_page) > */ > static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) > { > - struct page *page; > + struct ptdesc *ptdesc; > gfp_t gfp = GFP_PGTABLE_USER; > > if (mm == &init_mm) > gfp = GFP_PGTABLE_KERNEL; > - page = alloc_page(gfp); > - if (!page) > + ptdesc = pagetable_alloc(gfp, 0); > + if (!ptdesc) > return NULL; > - if (!pgtable_pmd_page_ctor(page)) { > - __free_page(page); > + if (!pagetable_pmd_ctor(ptdesc)) { > + pagetable_free(ptdesc); > return NULL; > } > - return (pmd_t *)page_address(page); > + return ptdesc_address(ptdesc); > } > #endif > > #ifndef __HAVE_ARCH_PMD_FREE > static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) > { > + struct ptdesc *ptdesc = virt_to_ptdesc(pmd); >
Re: [PATCH v4 20/34] arm: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:09PM -0700, Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > late_alloc() also uses the __get_free_pages() helper function. Convert > this to use pagetable_alloc() and ptdesc_address() instead to help > standardize page tables further. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) One comment below. > --- > arch/arm/include/asm/tlb.h | 12 +++- > arch/arm/mm/mmu.c | 6 +++--- > 2 files changed, 10 insertions(+), 8 deletions(-) > > diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h > index b8cbe03ad260..f40d06ad5d2a 100644 > --- a/arch/arm/include/asm/tlb.h > +++ b/arch/arm/include/asm/tlb.h > @@ -39,7 +39,9 @@ static inline void __tlb_remove_table(void *_table) > static inline void > __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) > { > - pgtable_pte_page_dtor(pte); > + struct ptdesc *ptdesc = page_ptdesc(pte); > + > + pagetable_pte_dtor(ptdesc); > > #ifndef CONFIG_ARM_LPAE > /* > @@ -50,17 +52,17 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, > unsigned long addr) > __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE); > #endif > > - tlb_remove_table(tlb, pte); > + tlb_remove_ptdesc(tlb, ptdesc); > } > > static inline void > __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) > { > #ifdef CONFIG_ARM_LPAE > - struct page *page = virt_to_page(pmdp); > + struct ptdesc *ptdesc = virt_to_ptdesc(pmdp); > > - pgtable_pmd_page_dtor(page); > - tlb_remove_table(tlb, page); > + pagetable_pmd_dtor(ptdesc); > + tlb_remove_ptdesc(tlb, ptdesc); > #endif > } > > diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c > index 22292cf3381c..294518fd0240 100644 > --- a/arch/arm/mm/mmu.c > +++ b/arch/arm/mm/mmu.c > @@ -737,11 +737,11 @@ static void __init *early_alloc(unsigned long sz) > > static void *__init late_alloc(unsigned long sz) > { > - void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz)); > + void *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL, get_order(sz)); > > - if (!ptr || !pgtable_pte_page_ctor(virt_to_page(ptr))) > + if (!ptdesc || !pagetable_pte_ctor(ptdesc)) > BUG(); > - return ptr; > + return ptdesc; should be return ptdesc_to_virt(ptdesc); > } > > static pte_t * __init arm_pte_alloc(pmd_t *pmd, unsigned long addr, > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [XEN PATCH] xen: fixed violations of MISRA C:2012 Rule 3.1
On 14.06.2023 16:28, Andrew Cooper wrote: > On 13/06/2023 8:42 am, Nicola Vetrini wrote: >> diff --git a/xen/common/xmalloc_tlsf.c b/xen/common/xmalloc_tlsf.c >> index 75bdf18c4e..ea6ec47a59 100644 >> --- a/xen/common/xmalloc_tlsf.c >> +++ b/xen/common/xmalloc_tlsf.c >> @@ -140,9 +140,10 @@ static inline void MAPPING_SEARCH(unsigned long *r, int >> *fl, int *sl) >> *fl = flsl(*r) - 1; >> *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; >> *fl -= FLI_OFFSET; >> -/*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0! >> - *fl = *sl = 0; >> - */ >> +#if 0 >> +if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0! >> +fl = *sl = 0; >> +#endif >> *r &= ~t; >> } >> } > > This logic has been commented out right from it's introduction in c/s > 9736b76d829b2d in 2008, and never touched since. > > I think it can safely be deleted, and not placed inside an #if 0. I have to admit that I wouldn't be happy with deleting without any replacement. Instead of the commented out code, how about instead having ASSERT(*fl >= 0)? (What isn't clear to me is whether the commented out code is actually meant to replace the earlier line, rather than (optionally) be there in addition - at least it very much looks like so. With such an uncertainty I'd be further inclined to not remove what's there.) Jan
Re: [PATCH v4 21/34] arm64: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:10PM -0700, Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/arm64/include/asm/tlb.h | 14 -- > arch/arm64/mm/mmu.c | 7 --- > 2 files changed, 12 insertions(+), 9 deletions(-) > > diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h > index c995d1f4594f..2c29239d05c3 100644 > --- a/arch/arm64/include/asm/tlb.h > +++ b/arch/arm64/include/asm/tlb.h > @@ -75,18 +75,20 @@ static inline void tlb_flush(struct mmu_gather *tlb) > static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, > unsigned long addr) > { > - pgtable_pte_page_dtor(pte); > - tlb_remove_table(tlb, pte); > + struct ptdesc *ptdesc = page_ptdesc(pte); > + > + pagetable_pte_dtor(ptdesc); > + tlb_remove_ptdesc(tlb, ptdesc); > } > > #if CONFIG_PGTABLE_LEVELS > 2 > static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, > unsigned long addr) > { > - struct page *page = virt_to_page(pmdp); > + struct ptdesc *ptdesc = virt_to_ptdesc(pmdp); > > - pgtable_pmd_page_dtor(page); > - tlb_remove_table(tlb, page); > + pagetable_pmd_dtor(ptdesc); > + tlb_remove_ptdesc(tlb, ptdesc); > } > #endif > > @@ -94,7 +96,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, > pmd_t *pmdp, > static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, > unsigned long addr) > { > - tlb_remove_table(tlb, virt_to_page(pudp)); > + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pudp)); > } > #endif > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index af6bc8403ee4..5867a0e917b9 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -426,6 +426,7 @@ static phys_addr_t __pgd_pgtable_alloc(int shift) > static phys_addr_t pgd_pgtable_alloc(int shift) > { > phys_addr_t pa = __pgd_pgtable_alloc(shift); > + struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); > > /* >* Call proper page table ctor in case later we need to > @@ -433,12 +434,12 @@ static phys_addr_t pgd_pgtable_alloc(int shift) >* this pre-allocated page table. >* >* We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is > - * folded, and if so pgtable_pmd_page_ctor() becomes nop. > + * folded, and if so pagetable_pte_ctor() becomes nop. >*/ > if (shift == PAGE_SHIFT) > - BUG_ON(!pgtable_pte_page_ctor(phys_to_page(pa))); > + BUG_ON(!pagetable_pte_ctor(ptdesc)); > else if (shift == PMD_SHIFT) > - BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa))); > + BUG_ON(!pagetable_pmd_ctor(ptdesc)); > > return pa; > } > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 22/34] csky: Convert __pte_free_tlb() to use ptdescs
On Mon, Jun 12, 2023 at 02:04:11PM -0700, Vishal Moola (Oracle) wrote: > Part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents. > > Signed-off-by: Vishal Moola (Oracle) > Acked-by: Guo Ren Acked-by: Mike Rapoport (IBM) > --- > arch/csky/include/asm/pgalloc.h | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h > index 7d57e5da0914..9c84c9012e53 100644 > --- a/arch/csky/include/asm/pgalloc.h > +++ b/arch/csky/include/asm/pgalloc.h > @@ -63,8 +63,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) > > #define __pte_free_tlb(tlb, pte, address)\ > do { \ > - pgtable_pte_page_dtor(pte); \ > - tlb_remove_page(tlb, pte); \ > + pagetable_pte_dtor(page_ptdesc(pte)); \ > + tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ > } while (0) > > extern void pagetable_init(void); > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 23/34] hexagon: Convert __pte_free_tlb() to use ptdescs
On Mon, Jun 12, 2023 at 02:04:12PM -0700, Vishal Moola (Oracle) wrote: > Part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/hexagon/include/asm/pgalloc.h | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/arch/hexagon/include/asm/pgalloc.h > b/arch/hexagon/include/asm/pgalloc.h > index f0c47e6a7427..55988625e6fb 100644 > --- a/arch/hexagon/include/asm/pgalloc.h > +++ b/arch/hexagon/include/asm/pgalloc.h > @@ -87,10 +87,10 @@ static inline void pmd_populate_kernel(struct mm_struct > *mm, pmd_t *pmd, > max_kernel_seg = pmdindex; > } > > -#define __pte_free_tlb(tlb, pte, addr) \ > -do { \ > - pgtable_pte_page_dtor((pte)); \ > - tlb_remove_page((tlb), (pte)); \ > +#define __pte_free_tlb(tlb, pte, addr) \ > +do { \ > + pagetable_pte_dtor((page_ptdesc(pte))); \ > + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ > } while (0) > > #endif > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH] libxg: shrink variable scope in xc_core_arch_map_p2m_list_rw()
On Wed, Jun 14, 2023 at 09:02:56AM +0200, Jan Beulich wrote: > This in particular allows to drop a dead assignment to "ptes" from near > the end of the function. > > Coverity ID: 1532314 > Fixes: bd7a29c3d0b9 ("tools/libs/ctrl: fix xc_core_arch_map_p2m() to support > linear p2m table") > Signed-off-by: Jan Beulich > --- > v2: Much bigger change to limit the scope of "ptes" and other variables. The change of scope of all variables isn't too hard to review with --word-diff option and they all look fine. > --- a/tools/libs/guest/xg_core_x86.c > +++ b/tools/libs/guest/xg_core_x86.c > @@ -169,18 +169,21 @@ xc_core_arch_map_p2m_list_rw(xc_interfac > if ( !mfns ) > { > ERROR("Cannot allocate memory for array of %u mfns", idx); > +out_unmap: > +munmap(ptes, n_pages * PAGE_SIZE); > goto out; > } > I guess it's not that great to have the label out_unmap in the middle of the for loop (at least it's near the beginning), but at least that mean that the mapping need to be gone once out of the loop. So if someone edit the for loop and introduce a `goto out` instead of `goto out_unmap` there's just a potential leak rather than potential use-after-free or double-free, so I guess that better. So: Acked-by: Anthony PERARD Cheers, -- Anthony PERARD
Re: [PATCH v4 24/34] loongarch: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:13PM -0700, Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > Some of the functions use the *get*page*() helper functions. Convert > these to use pagetable_alloc() and ptdesc_address() instead to help > standardize page tables further. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/loongarch/include/asm/pgalloc.h | 27 +++ > arch/loongarch/mm/pgtable.c | 7 --- > 2 files changed, 19 insertions(+), 15 deletions(-) > > diff --git a/arch/loongarch/include/asm/pgalloc.h > b/arch/loongarch/include/asm/pgalloc.h > index af1d1e4a6965..70bb3bdd201e 100644 > --- a/arch/loongarch/include/asm/pgalloc.h > +++ b/arch/loongarch/include/asm/pgalloc.h > @@ -45,9 +45,9 @@ extern void pagetable_init(void); > extern pgd_t *pgd_alloc(struct mm_struct *mm); > > #define __pte_free_tlb(tlb, pte, address)\ > -do { \ > - pgtable_pte_page_dtor(pte); \ > - tlb_remove_page((tlb), pte);\ > +do { \ > + pagetable_pte_dtor(page_ptdesc(pte)); \ > + tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\ > } while (0) > > #ifndef __PAGETABLE_PMD_FOLDED > @@ -55,18 +55,18 @@ do { > \ > static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long > address) > { > pmd_t *pmd; > - struct page *pg; > + struct ptdesc *ptdesc; > > - pg = alloc_page(GFP_KERNEL_ACCOUNT); > - if (!pg) > + ptdesc = pagetable_alloc(GFP_KERNEL_ACCOUNT, 0); > + if (!ptdesc) > return NULL; > > - if (!pgtable_pmd_page_ctor(pg)) { > - __free_page(pg); > + if (!pagetable_pmd_ctor(ptdesc)) { > + pagetable_free(ptdesc); > return NULL; > } > > - pmd = (pmd_t *)page_address(pg); > + pmd = ptdesc_address(ptdesc); > pmd_init(pmd); > return pmd; > } > @@ -80,10 +80,13 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, > unsigned long address) > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long > address) > { > pud_t *pud; > + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); > > - pud = (pud_t *) __get_free_page(GFP_KERNEL); > - if (pud) > - pud_init(pud); > + if (!ptdesc) > + return NULL; > + pud = ptdesc_address(ptdesc); > + > + pud_init(pud); > return pud; > } > > diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c > index 36a6dc0148ae..cdba10ffc0df 100644 > --- a/arch/loongarch/mm/pgtable.c > +++ b/arch/loongarch/mm/pgtable.c > @@ -11,10 +11,11 @@ > > pgd_t *pgd_alloc(struct mm_struct *mm) > { > - pgd_t *ret, *init; > + pgd_t *init, *ret = NULL; > + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); > > - ret = (pgd_t *) __get_free_page(GFP_KERNEL); > - if (ret) { > + if (ptdesc) { > + ret = (pgd_t *)ptdesc_address(ptdesc); > init = pgd_offset(&init_mm, 0UL); > pgd_init(ret); > memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 25/34] m68k: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:14PM -0700, Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > Some of the functions use the *get*page*() helper functions. Convert > these to use pagetable_alloc() and ptdesc_address() instead to help > standardize page tables further. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) One comment below > --- > arch/m68k/include/asm/mcf_pgalloc.h | 41 ++-- > arch/m68k/include/asm/sun3_pgalloc.h | 8 +++--- > arch/m68k/mm/motorola.c | 4 +-- > 3 files changed, 27 insertions(+), 26 deletions(-) > > diff --git a/arch/m68k/include/asm/mcf_pgalloc.h > b/arch/m68k/include/asm/mcf_pgalloc.h > index 5c2c0a864524..857949ac9431 100644 > --- a/arch/m68k/include/asm/mcf_pgalloc.h > +++ b/arch/m68k/include/asm/mcf_pgalloc.h > @@ -7,20 +7,19 @@ > > extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) > { > - free_page((unsigned long) pte); > + pagetable_free(virt_to_ptdesc(pte)); > } > > extern const char bad_pmd_string[]; > > extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) > { > - unsigned long page = __get_free_page(GFP_DMA); > + struct ptdesc *ptdesc = pagetable_alloc(GFP_DMA | __GFP_ZERO, 0); > > - if (!page) > + if (!ptdesc) > return NULL; > > - memset((void *)page, 0, PAGE_SIZE); > - return (pte_t *) (page); > + return ptdesc_address(ptdesc); > } > > extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address) > @@ -35,36 +34,36 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, > unsigned long address) > static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pgtable, > unsigned long address) > { > - struct page *page = virt_to_page(pgtable); > + struct ptdesc *ptdesc = virt_to_ptdesc(pgtable); > > - pgtable_pte_page_dtor(page); > - __free_page(page); > + pagetable_pte_dtor(ptdesc); > + pagetable_free(ptdesc); > } > > static inline pgtable_t pte_alloc_one(struct mm_struct *mm) > { > - struct page *page = alloc_pages(GFP_DMA, 0); > + struct ptdesc *ptdesc = pagetable_alloc(GFP_DMA, 0); You can add __GFP_ZERO here and drop pagetable_clear() below > pte_t *pte; > > - if (!page) > + if (!ptdesc) > return NULL; > - if (!pgtable_pte_page_ctor(page)) { > - __free_page(page); > + if (!pagetable_pte_ctor(ptdesc)) { > + pagetable_free(ptdesc); > return NULL; > } > > - pte = page_address(page); > - clear_page(pte); > + pte = ptdesc_address(ptdesc); > + pagetable_clear(pte); > > return pte; > } > > static inline void pte_free(struct mm_struct *mm, pgtable_t pgtable) > { > - struct page *page = virt_to_page(pgtable); > + struct ptdesc *ptdesc = virt_to_ptdesc(pgtable); > > - pgtable_pte_page_dtor(page); > - __free_page(page); > + pagetable_pte_dtor(ptdesc); > + pagetable_free(ptdesc); > } > > /* > @@ -75,16 +74,18 @@ static inline void pte_free(struct mm_struct *mm, > pgtable_t pgtable) > > static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) > { > - free_page((unsigned long) pgd); > + pagetable_free(virt_to_ptdesc(pgd)); > } > > static inline pgd_t *pgd_alloc(struct mm_struct *mm) > { > pgd_t *new_pgd; > + struct ptdesc *ptdesc = pagetable_alloc(GFP_DMA | GFP_NOWARN, 0); > > - new_pgd = (pgd_t *)__get_free_page(GFP_DMA | __GFP_NOWARN); > - if (!new_pgd) > + if (!ptdesc) > return NULL; > + new_pgd = ptdesc_address(ptdesc); > + > memcpy(new_pgd, swapper_pg_dir, PTRS_PER_PGD * sizeof(pgd_t)); > memset(new_pgd, 0, PAGE_OFFSET >> PGDIR_SHIFT); > return new_pgd; > diff --git a/arch/m68k/include/asm/sun3_pgalloc.h > b/arch/m68k/include/asm/sun3_pgalloc.h > index 198036aff519..ff48573db2c0 100644 > --- a/arch/m68k/include/asm/sun3_pgalloc.h > +++ b/arch/m68k/include/asm/sun3_pgalloc.h > @@ -17,10 +17,10 @@ > > extern const char bad_pmd_string[]; > > -#define __pte_free_tlb(tlb,pte,addr) \ > -do { \ > - pgtable_pte_page_dtor(pte); \ > - tlb_remove_page((tlb), pte);\ > +#define __pte_free_tlb(tlb, pte, addr) \ > +do { \ > + pagetable_pte_dtor(page_ptdesc(pte)); \ > + tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\ > } while (0) > > static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, > pte_t *pte) > diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c > index c75984e2d86b..594575a0780c 100644 > --- a/arch/m68
Re: [PATCH v4 26/34] mips: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:15PM -0700, Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > Some of the functions use the *get*page*() helper functions. Convert > these to use pagetable_alloc() and ptdesc_address() instead to help > standardize page tables further. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/mips/include/asm/pgalloc.h | 31 +-- > arch/mips/mm/pgtable.c | 7 --- > 2 files changed, 21 insertions(+), 17 deletions(-) > > diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h > index f72e737dda21..6940e5536664 100644 > --- a/arch/mips/include/asm/pgalloc.h > +++ b/arch/mips/include/asm/pgalloc.h > @@ -51,13 +51,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm); > > static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) > { > - free_pages((unsigned long)pgd, PGD_TABLE_ORDER); > + pagetable_free(virt_to_ptdesc(pgd)); > } > > -#define __pte_free_tlb(tlb,pte,address) \ > -do { \ > - pgtable_pte_page_dtor(pte); \ > - tlb_remove_page((tlb), pte);\ > +#define __pte_free_tlb(tlb, pte, address)\ > +do { \ > + pagetable_pte_dtor(page_ptdesc(pte)); \ > + tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\ > } while (0) > > #ifndef __PAGETABLE_PMD_FOLDED > @@ -65,18 +65,18 @@ do { > \ > static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long > address) > { > pmd_t *pmd; > - struct page *pg; > + struct ptdesc *ptdesc; > > - pg = alloc_pages(GFP_KERNEL_ACCOUNT, PMD_TABLE_ORDER); > - if (!pg) > + ptdesc = pagetable_alloc(GFP_KERNEL_ACCOUNT, PMD_TABLE_ORDER); > + if (!ptdesc) > return NULL; > > - if (!pgtable_pmd_page_ctor(pg)) { > - __free_pages(pg, PMD_TABLE_ORDER); > + if (!pagetable_pmd_ctor(ptdesc)) { > + pagetable_free(ptdesc); > return NULL; > } > > - pmd = (pmd_t *)page_address(pg); > + pmd = ptdesc_address(ptdesc); > pmd_init(pmd); > return pmd; > } > @@ -90,10 +90,13 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, > unsigned long address) > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long > address) > { > pud_t *pud; > + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, PUD_TABLE_ORDER); > > - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_TABLE_ORDER); > - if (pud) > - pud_init(pud); > + if (!ptdesc) > + return NULL; > + pud = ptdesc_address(ptdesc); > + > + pud_init(pud); > return pud; > } > > diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c > index b13314be5d0e..729258ff4e3b 100644 > --- a/arch/mips/mm/pgtable.c > +++ b/arch/mips/mm/pgtable.c > @@ -10,10 +10,11 @@ > > pgd_t *pgd_alloc(struct mm_struct *mm) > { > - pgd_t *ret, *init; > + pgd_t *init, *ret = NULL; > + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, PGD_TABLE_ORDER); > > - ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER); > - if (ret) { > + if (ptdesc) { > + ret = ptdesc_address(ptdesc); > init = pgd_offset(&init_mm, 0UL); > pgd_init(ret); > memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 27/34] nios2: Convert __pte_free_tlb() to use ptdescs
On Mon, Jun 12, 2023 at 02:04:16PM -0700, Vishal Moola (Oracle) wrote: > Part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/nios2/include/asm/pgalloc.h | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/arch/nios2/include/asm/pgalloc.h > b/arch/nios2/include/asm/pgalloc.h > index ecd1657bb2ce..ce6bb8e74271 100644 > --- a/arch/nios2/include/asm/pgalloc.h > +++ b/arch/nios2/include/asm/pgalloc.h > @@ -28,10 +28,10 @@ static inline void pmd_populate(struct mm_struct *mm, > pmd_t *pmd, > > extern pgd_t *pgd_alloc(struct mm_struct *mm); > > -#define __pte_free_tlb(tlb, pte, addr) \ > - do {\ > - pgtable_pte_page_dtor(pte); \ > - tlb_remove_page((tlb), (pte)); \ > +#define __pte_free_tlb(tlb, pte, addr) > \ > + do {\ > + pagetable_pte_dtor(page_ptdesc(pte)); \ > + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ > } while (0) > > #endif /* _ASM_NIOS2_PGALLOC_H */ > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 28/34] openrisc: Convert __pte_free_tlb() to use ptdescs
On Mon, Jun 12, 2023 at 02:04:17PM -0700, Vishal Moola (Oracle) wrote: > Part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/openrisc/include/asm/pgalloc.h | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/arch/openrisc/include/asm/pgalloc.h > b/arch/openrisc/include/asm/pgalloc.h > index b7b2b8d16fad..c6a73772a546 100644 > --- a/arch/openrisc/include/asm/pgalloc.h > +++ b/arch/openrisc/include/asm/pgalloc.h > @@ -66,10 +66,10 @@ extern inline pgd_t *pgd_alloc(struct mm_struct *mm) > > extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); > > -#define __pte_free_tlb(tlb, pte, addr) \ > -do { \ > - pgtable_pte_page_dtor(pte); \ > - tlb_remove_page((tlb), (pte)); \ > +#define __pte_free_tlb(tlb, pte, addr) \ > +do { \ > + pagetable_pte_dtor(page_ptdesc(pte)); \ > + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ > } while (0) > > #endif > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 29/34] riscv: Convert alloc_{pmd, pte}_late() to use ptdescs
On Mon, Jun 12, 2023 at 02:04:18PM -0700, Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > Some of the functions use the *get*page*() helper functions. Convert > these to use pagetable_alloc() and ptdesc_address() instead to help > standardize page tables further. > > Signed-off-by: Vishal Moola (Oracle) > Acked-by: Palmer Dabbelt Acked-by: Mike Rapoport (IBM) > --- > arch/riscv/include/asm/pgalloc.h | 8 > arch/riscv/mm/init.c | 16 ++-- > 2 files changed, 10 insertions(+), 14 deletions(-) > > diff --git a/arch/riscv/include/asm/pgalloc.h > b/arch/riscv/include/asm/pgalloc.h > index 59dc12b5b7e8..d169a4f41a2e 100644 > --- a/arch/riscv/include/asm/pgalloc.h > +++ b/arch/riscv/include/asm/pgalloc.h > @@ -153,10 +153,10 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) > > #endif /* __PAGETABLE_PMD_FOLDED */ > > -#define __pte_free_tlb(tlb, pte, buf) \ > -do {\ > - pgtable_pte_page_dtor(pte); \ > - tlb_remove_page((tlb), pte);\ > +#define __pte_free_tlb(tlb, pte, buf)\ > +do { \ > + pagetable_pte_dtor(page_ptdesc(pte)); \ > + tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\ > } while (0) > #endif /* CONFIG_MMU */ > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index 3d689ffb2072..6bfeec80bf4e 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -354,12 +354,10 @@ static inline phys_addr_t __init > alloc_pte_fixmap(uintptr_t va) > > static phys_addr_t __init alloc_pte_late(uintptr_t va) > { > - unsigned long vaddr; > - > - vaddr = __get_free_page(GFP_KERNEL); > - BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page((void *)vaddr))); > + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); > > - return __pa(vaddr); > + BUG_ON(!ptdesc || !pagetable_pte_ctor(ptdesc)); > + return __pa((pte_t *)ptdesc_address(ptdesc)); > } > > static void __init create_pte_mapping(pte_t *ptep, > @@ -437,12 +435,10 @@ static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) > > static phys_addr_t __init alloc_pmd_late(uintptr_t va) > { > - unsigned long vaddr; > - > - vaddr = __get_free_page(GFP_KERNEL); > - BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page((void *)vaddr))); > + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); > > - return __pa(vaddr); > + BUG_ON(!ptdesc || !pagetable_pmd_ctor(ptdesc)); > + return __pa((pmd_t *)ptdesc_address(ptdesc)); > } > > static void __init create_pmd_mapping(pmd_t *pmdp, > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 30/34] sh: Convert pte_free_tlb() to use ptdescs
On Mon, Jun 12, 2023 at 02:04:19PM -0700, Vishal Moola (Oracle) wrote: > Part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents. Also cleans up some spacing issues. > > Signed-off-by: Vishal Moola (Oracle) > Reviewed-by: Geert Uytterhoeven > Acked-by: John Paul Adrian Glaubitz Acked-by: Mike Rapoport (IBM) > --- > arch/sh/include/asm/pgalloc.h | 9 + > 1 file changed, 5 insertions(+), 4 deletions(-) > > diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h > index a9e98233c4d4..5d8577ab1591 100644 > --- a/arch/sh/include/asm/pgalloc.h > +++ b/arch/sh/include/asm/pgalloc.h > @@ -2,6 +2,7 @@ > #ifndef __ASM_SH_PGALLOC_H > #define __ASM_SH_PGALLOC_H > > +#include > #include > > #define __HAVE_ARCH_PMD_ALLOC_ONE > @@ -31,10 +32,10 @@ static inline void pmd_populate(struct mm_struct *mm, > pmd_t *pmd, > set_pmd(pmd, __pmd((unsigned long)page_address(pte))); > } > > -#define __pte_free_tlb(tlb,pte,addr) \ > -do { \ > - pgtable_pte_page_dtor(pte); \ > - tlb_remove_page((tlb), (pte)); \ > +#define __pte_free_tlb(tlb, pte, addr) \ > +do { \ > + pagetable_pte_dtor(page_ptdesc(pte)); \ > + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ > } while (0) > > #endif /* __ASM_SH_PGALLOC_H */ > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 31/34] sparc64: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:20PM -0700, Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/sparc/mm/init_64.c | 17 + > 1 file changed, 9 insertions(+), 8 deletions(-) > > diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c > index 04f9db0c3111..105915cd2eee 100644 > --- a/arch/sparc/mm/init_64.c > +++ b/arch/sparc/mm/init_64.c > @@ -2893,14 +2893,15 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm) > > pgtable_t pte_alloc_one(struct mm_struct *mm) > { > - struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO); > - if (!page) > + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL | __GFP_ZERO, 0); > + > + if (!ptdesc) > return NULL; > - if (!pgtable_pte_page_ctor(page)) { > - __free_page(page); > + if (!pagetable_pte_ctor(ptdesc)) { > + pagetable_free(ptdesc); > return NULL; > } > - return (pte_t *) page_address(page); > + return ptdesc_address(ptdesc); > } > > void pte_free_kernel(struct mm_struct *mm, pte_t *pte) > @@ -2910,10 +2911,10 @@ void pte_free_kernel(struct mm_struct *mm, pte_t *pte) > > static void __pte_free(pgtable_t pte) > { > - struct page *page = virt_to_page(pte); > + struct ptdesc *ptdesc = virt_to_ptdesc(pte); > > - pgtable_pte_page_dtor(page); > - __free_page(page); > + pagetable_pte_dtor(ptdesc); > + pagetable_free(ptdesc); > } > > void pte_free(struct mm_struct *mm, pgtable_t pte) > -- > 2.40.1 > > > ___ > linux-riscv mailing list > linux-ri...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv -- Sincerely yours, Mike.
Re: [PATCH v4 32/34] sparc: Convert pgtable_pte_page_{ctor, dtor}() to ptdesc equivalents
On Mon, Jun 12, 2023 at 02:04:21PM -0700, Vishal Moola (Oracle) wrote: > Part of the conversions to replace pgtable pte constructor/destructors with > ptdesc equivalents. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/sparc/mm/srmmu.c | 5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > > diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c > index 13f027afc875..8393faa3e596 100644 > --- a/arch/sparc/mm/srmmu.c > +++ b/arch/sparc/mm/srmmu.c > @@ -355,7 +355,8 @@ pgtable_t pte_alloc_one(struct mm_struct *mm) > return NULL; > page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT); > spin_lock(&mm->page_table_lock); > - if (page_ref_inc_return(page) == 2 && !pgtable_pte_page_ctor(page)) { > + if (page_ref_inc_return(page) == 2 && > + !pagetable_pte_ctor(page_ptdesc(page))) { > page_ref_dec(page); > ptep = NULL; > } > @@ -371,7 +372,7 @@ void pte_free(struct mm_struct *mm, pgtable_t ptep) > page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT); > spin_lock(&mm->page_table_lock); > if (page_ref_dec_return(page) == 1) > - pgtable_pte_page_dtor(page); > + pagetable_pte_dtor(page_ptdesc(page)); > spin_unlock(&mm->page_table_lock); > > srmmu_free_nocache(ptep, SRMMU_PTE_TABLE_SIZE); > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 33/34] um: Convert {pmd, pte}_free_tlb() to use ptdescs
On Mon, Jun 12, 2023 at 02:04:22PM -0700, Vishal Moola (Oracle) wrote: > Part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents. Also cleans up some spacing issues. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) > --- > arch/um/include/asm/pgalloc.h | 18 +- > 1 file changed, 9 insertions(+), 9 deletions(-) > > diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h > index 8ec7cd46dd96..de5e31c64793 100644 > --- a/arch/um/include/asm/pgalloc.h > +++ b/arch/um/include/asm/pgalloc.h > @@ -25,19 +25,19 @@ > */ > extern pgd_t *pgd_alloc(struct mm_struct *); > > -#define __pte_free_tlb(tlb,pte, address) \ > -do { \ > - pgtable_pte_page_dtor(pte); \ > - tlb_remove_page((tlb),(pte)); \ > +#define __pte_free_tlb(tlb, pte, address)\ > +do { \ > + pagetable_pte_dtor(page_ptdesc(pte)); \ > + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ > } while (0) > > #ifdef CONFIG_3_LEVEL_PGTABLES > > -#define __pmd_free_tlb(tlb, pmd, address)\ > -do { \ > - pgtable_pmd_page_dtor(virt_to_page(pmd)); \ > - tlb_remove_page((tlb),virt_to_page(pmd)); \ > -} while (0) \ > +#define __pmd_free_tlb(tlb, pmd, address)\ > +do { \ > + pagetable_pmd_dtor(virt_to_ptdesc(pmd));\ > + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ > +} while (0) > > #endif > > -- > 2.40.1 > > -- Sincerely yours, Mike.
Re: [PATCH v4 34/34] mm: Remove pgtable_{pmd, pte}_page_{ctor, dtor}() wrappers
On Mon, Jun 12, 2023 at 02:04:23PM -0700, Vishal Moola (Oracle) wrote: > These functions are no longer necessary. Remove them and cleanup > Documentation referencing them. > > Signed-off-by: Vishal Moola (Oracle) I've found one stale reference in riscv: $ git grep -n pgtable_pmd_page_ctor arch/riscv/mm/init.c:440: BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page(vaddr))); Otherwise Acked-by: Mike Rapoport (IBM) > --- > Documentation/mm/split_page_table_lock.rst| 12 +-- > .../zh_CN/mm/split_page_table_lock.rst| 14 ++--- > include/linux/mm.h| 20 --- > 3 files changed, 13 insertions(+), 33 deletions(-) > > diff --git a/Documentation/mm/split_page_table_lock.rst > b/Documentation/mm/split_page_table_lock.rst > index 50ee0dfc95be..4bffec728340 100644 > --- a/Documentation/mm/split_page_table_lock.rst > +++ b/Documentation/mm/split_page_table_lock.rst > @@ -53,7 +53,7 @@ Support of split page table lock by an architecture > === > > There's no need in special enabling of PTE split page table lock: everything > -required is done by pgtable_pte_page_ctor() and pgtable_pte_page_dtor(), > which > +required is done by pagetable_pte_ctor() and pagetable_pte_dtor(), which > must be called on PTE table allocation / freeing. > > Make sure the architecture doesn't use slab allocator for page table > @@ -63,8 +63,8 @@ This field shares storage with page->ptl. > PMD split lock only makes sense if you have more than two page table > levels. > > -PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table > -allocation and pgtable_pmd_page_dtor() on freeing. > +PMD split lock enabling requires pagetable_pmd_ctor() call on PMD table > +allocation and pagetable_pmd_dtor() on freeing. > > Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and > pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing > @@ -72,7 +72,7 @@ paths: i.e X86_PAE preallocate few PMDs on pgd_alloc(). > > With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK. > > -NOTE: pgtable_pte_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must > +NOTE: pagetable_pte_ctor() and pagetable_pmd_ctor() can fail -- it must > be handled properly. > > page->ptl > @@ -92,7 +92,7 @@ trick: > split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs > one more cache line for indirect access; > > -The spinlock_t allocated in pgtable_pte_page_ctor() for PTE table and in > -pgtable_pmd_page_ctor() for PMD table. > +The spinlock_t allocated in pagetable_pte_ctor() for PTE table and in > +pagetable_pmd_ctor() for PMD table. > > Please, never access page->ptl directly -- use appropriate helper. > diff --git a/Documentation/translations/zh_CN/mm/split_page_table_lock.rst > b/Documentation/translations/zh_CN/mm/split_page_table_lock.rst > index 4fb7aa666037..a2c288670a24 100644 > --- a/Documentation/translations/zh_CN/mm/split_page_table_lock.rst > +++ b/Documentation/translations/zh_CN/mm/split_page_table_lock.rst > @@ -56,16 +56,16 @@ Hugetlb特定的辅助函数: > 架构对分页表锁的支持 > > > -没有必要特别启用PTE分页表锁:所有需要的东西都由pgtable_pte_page_ctor() > -和pgtable_pte_page_dtor()完成,它们必须在PTE表分配/释放时被调用。 > +没有必要特别启用PTE分页表锁:所有需要的东西都由pagetable_pte_ctor() > +和pagetable_pte_dtor()完成,它们必须在PTE表分配/释放时被调用。 > > 确保架构不使用slab分配器来分配页表:slab使用page->slab_cache来分配其页 > 面。这个区域与page->ptl共享存储。 > > PMD分页锁只有在你有两个以上的页表级别时才有意义。 > > -启用PMD分页锁需要在PMD表分配时调用pgtable_pmd_page_ctor(),在释放时调 > -用pgtable_pmd_page_dtor()。 > +启用PMD分页锁需要在PMD表分配时调用pagetable_pmd_ctor(),在释放时调 > +用pagetable_pmd_dtor()。 > > 分配通常发生在pmd_alloc_one()中,释放发生在pmd_free()和pmd_free_tlb() > 中,但要确保覆盖所有的PMD表分配/释放路径:即X86_PAE在pgd_alloc()中预先 > @@ -73,7 +73,7 @@ PMD分页锁只有在你有两个以上的页表级别时才有意义。 > > 一切就绪后,你可以设置CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK。 > > -注意:pgtable_pte_page_ctor()和pgtable_pmd_page_ctor()可能失败--必 > +注意:pagetable_pte_ctor()和pagetable_pmd_ctor()可能失败--必 > 须正确处理。 > > page->ptl > @@ -90,7 +90,7 @@ page->ptl用于访问分割页表锁,其中'page'是包含该表的页面struc > 的指针并动态分配它。这允许在启用DEBUG_SPINLOCK或DEBUG_LOCK_ALLOC的 > 情况下使用分页锁,但由于间接访问而多花了一个缓存行。 > > -PTE表的spinlock_t分配在pgtable_pte_page_ctor()中,PMD表的spinlock_t > -分配在pgtable_pmd_page_ctor()中。 > +PTE表的spinlock_t分配在pagetable_pte_ctor()中,PMD表的spinlock_t > +分配在pagetable_pmd_ctor()中。 > > 请不要直接访问page->ptl - -使用适当的辅助函数。 > diff --git a/include/linux/mm.h b/include/linux/mm.h > index dc211c43610b..6d83483cf186 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2897,11 +2897,6 @@ static inline bool pagetable_pte_ctor(struct ptdesc > *ptdesc) > return true; > } > > -static inline bool pgtable_pte_page_ctor(struct page *page) > -{ > - return pagetable_pte_ctor(page_ptdesc(page)); > -} > - > static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) > { > struct folio *folio = ptdesc_folio(ptdesc); > @@ -2911,11 +2906,6 @@
Re: [PATCH v3 2/4] xen: Add files needed for minimal ppc64le build
On 13.06.2023 16:50, Shawn Anastasio wrote: > --- /dev/null > +++ b/xen/arch/ppc/Makefile > @@ -0,0 +1,16 @@ > +obj-$(CONFIG_PPC64) += ppc64/ > + > +$(TARGET): $(TARGET)-syms > + cp -f $< $@ > + > +$(TARGET)-syms: $(objtree)/prelink.o $(obj)/xen.lds > + $(LD) $(XEN_LDFLAGS) -T $(obj)/xen.lds -N $< $(build_id_linker) -o $@ > + $(NM) -pa --format=sysv $(@D)/$(@F) \ > + | $(objtree)/tools/symbols --all-symbols --xensyms --sysv > --sort \ > + >$(@D)/$(@F).map Elsewhere we recently switched these uses of $(@D)/$(@F) to just $@. Please can you do so here as well? > --- /dev/null > +++ b/xen/arch/ppc/arch.mk > @@ -0,0 +1,11 @@ > + > +# Power-specific definitions > + > +ppc-march-$(CONFIG_POWER_ISA_2_07B) := power8 > +ppc-march-$(CONFIG_POWER_ISA_3_00) := power9 > + > +CFLAGS += -mcpu=$(ppc-march-y) -mstrict-align -mcmodel=large -mabi=elfv2 > -mno-altivec -mno-vsx Wouldn't it make sense to also pass -mlittle here, such that a tool chain defaulting to big-endian can still be used? > --- /dev/null > +++ b/xen/arch/ppc/ppc64/head.S > @@ -0,0 +1,27 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > + > +.section .text.header, "ax", %progbits > + > +ENTRY(start) > +/* > + * Depending on how we were booted, the CPU could be running in either > + * Little Endian or Big Endian mode. The following trampoline from Linux > + * cleverly uses an instruction that encodes to a NOP if the CPU's > + * endianness matches the assumption of the assembler (LE, in our case) > + * or a branch to code that performs the endian switch in the other case. > + */ > +tdi 0, 0, 0x48/* Reverse endian of b . + 8 */ > +b $ + 44 /* Skip trampoline if endian is good */ If I get this right, $ and . are interchangable on Power? If not, then all is fine and there likely is a reason to use . in the comment but $ in the code. But if so, it would be nice if both could match, and I guess with other architectures in mind . would be preferable. > --- /dev/null > +++ b/xen/arch/ppc/xen.lds.S > @@ -0,0 +1,173 @@ > +#include > + > +#undef ENTRY > +#undef ALIGN > + > +OUTPUT_ARCH(powerpc:common64) > +ENTRY(start) > + > +PHDRS > +{ > +text PT_LOAD ; > +#if defined(BUILD_ID) > +note PT_NOTE ; > +#endif > +} > + > +/** > + * OF's base load address is 0x40 (XEN_VIRT_START). > + * By defining sections this way, we can keep our virtual address base at > 0x40 > + * while keeping the physical base at 0x0. > + * > + * Without this, OF incorrectly loads .text at 0x40 + 0x40 = > 0x80. > + * Taken from x86/xen.lds.S > + */ > +#ifdef CONFIG_LD_IS_GNU > +# define DECL_SECTION(x) x : AT(ADDR(#x) - XEN_VIRT_START) > +#else > +# define DECL_SECTION(x) x : AT(ADDR(x) - XEN_VIRT_START) > +#endif > + > +SECTIONS > +{ > +. = XEN_VIRT_START; > + > +DECL_SECTION(.text) { > +_stext = .;/* Text section */ > +*(.text.header) > + > +*(.text.cold) > +*(.text.unlikely .text.*_unlikely .text.unlikely.*) > + > +*(.text) > +#ifdef CONFIG_CC_SPLIT_SECTIONS > +*(.text.*) > +#endif > + > +*(.fixup) > +*(.gnu.warning) > +. = ALIGN(POINTER_ALIGN); > +_etext = .; /* End of text section */ > +} :text > + > +. = ALIGN(PAGE_SIZE); > +DECL_SECTION(.rodata) { > +_srodata = .; /* Read-only data */ > +*(.rodata) > +*(.rodata.*) > +*(.data.rel.ro) > +*(.data.rel.ro.*) > + > +VPCI_ARRAY > + > +. = ALIGN(POINTER_ALIGN); > +_erodata = .;/* End of read-only data */ > +} :text > + > +#if defined(BUILD_ID) > +. = ALIGN(4); > +DECL_SECTION(.note.gnu.build-id) { > +__note_gnu_build_id_start = .; > +*(.note.gnu.build-id) > +__note_gnu_build_id_end = .; > +} :note :text > +#endif > +_erodata = .;/* End of read-only data */ > + > +. = ALIGN(PAGE_SIZE); > +DECL_SECTION(.data.ro_after_init) { > +__ro_after_init_start = .; > +*(.data.ro_after_init) > +. = ALIGN(PAGE_SIZE); > +__ro_after_init_end = .; > +} : text > + > +DECL_SECTION(.data.read_mostly) { > +*(.data.read_mostly) > +} :text > + > +. = ALIGN(PAGE_SIZE); > +DECL_SECTION(.data) {/* Data */ > +*(.data.page_aligned) > +. = ALIGN(8); > +__start_schedulers_array = .; > +*(.data.schedulers) > +__end_schedulers_array = .; > + > +HYPFS_PARAM > + > +*(.data .data.*) > +CONSTRUCTORS > +} :text > + > +. = ALIGN(PAGE_SIZE); /* Init code and data */ > +__init_begin = .; > +DECL_SECTION(.init.text) { > +_sinittext = .; > +*(.init.text) > +_einittext = .; > +. = ALIGN(PAGE_SIZE);/* Avoid mapping alt insns executable
[PATCH] Arm: drop bogus ALIGN() from linker script
Having ALIGN() inside a section definition usually makes sense only with a label definition following (an exception case is a few lines out of context, where cache line sharing is intended to be avoided). Constituents of .bss.page_aligned need to specify their own alignment correctly anyway, or else they're susceptible to link order changing. This requirement is already met: Arm-specific code has no such object, while common (EFI) code has another one. That one has suitable alignment specified. Signed-off-by: Jan Beulich --- Note how RISC-V had this dropped pretty recently. --- a/xen/arch/arm/xen.lds.S +++ b/xen/arch/arm/xen.lds.S @@ -199,7 +199,6 @@ SECTIONS .bss : { /* BSS */ __bss_start = .; *(.bss.stack_aligned) - . = ALIGN(PAGE_SIZE); *(.bss.page_aligned) . = ALIGN(PAGE_SIZE); __per_cpu_start = .;
Re: Functions _spin_lock_cb() and handle_ro_raz()
On 14/06/23 16:03, Jan Beulich wrote: On 14.06.2023 15:08, Federico Serafini wrote: Hello everyone, I am working on the violations of MISRA C:2012 Rule 8.10, whose headline says: "An inline function shall be declared with the static storage class". For both ARM64 and X86_64 builds, function _spin_lock_cb() defined in spinlock.c violates the rule. Such function is declared in spinlock.h without the inline function specifier: are there any reasons to do this? Since this function was mentioned elsewhere already, I'm afraid I have to be a little blunt and ask back: Did you check the history of the function. Yes, it is intentional to be that way, for the function to be inlined into _spin_lock(), and for it to also be available for external callers (we have just one right now, but that could change). What about solving the violation by moving the function definition in spinlock.h and declaring it as static inline? Did you try whether that would work at least purely mechanically? I'm afraid you'll find that it doesn't, because of LOCK_PROFILE_* being unavailable then. Yet we also don't want to expose all that in the header. In the earlier context I did suggest already to make the function an always-inline one in spinlock.c, under a slightly altered name, and then have _spin_lock_cb() be a trivial wrapper just like _spin_lock() is. I guess best is going to be if I make and post a patch ... Jan Thank you for the information. Federico
[PATCH v2] xen/grant: Purge PIN_FAIL()
The name PIN_FAIL() is poor; it's not used only pinning failures. More importantly, it interferes with code legibility by hiding control flow. Expand and drop it. * Drop redundant "rc = rc" assignment * Rework gnttab_copy_buf() to be simpler by dropping the rc variable As a side effect, this fixes several violations of MISRA rule 2.1 (dead code - the while() following a goto). No functional change. Signed-off-by: Andrew Cooper Reviewed-by: Julien Grall Reviewed-by: Jan Beulich --- CC: Jan Beulich CC: Roger Pau Monné CC: Wei Liu CC: Stefano Stabellini CC: Julien Grall CC: Volodymyr Babchuk CC: Bertrand Marquis v2: * Fix indentation. * Reword the commit message a little. --- xen/common/grant_table.c | 154 --- 1 file changed, 111 insertions(+), 43 deletions(-) diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index d87e58a53d86..89b7811c51c3 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -270,13 +270,6 @@ struct gnttab_unmap_common { #define GNTTAB_UNMAP_BATCH_SIZE 32 -#define PIN_FAIL(_lbl, _rc, _f, _a...) \ -do {\ -gdprintk(XENLOG_WARNING, _f, ## _a ); \ -rc = (_rc); \ -goto _lbl; \ -} while ( 0 ) - /* * Tracks a mapping of another domain's grant reference. Each domain has a * table of these, indexes into which are returned as a 'mapping handle'. @@ -785,9 +778,13 @@ static int _set_status_v1(const grant_entry_header_t *shah, /* If not already pinned, check the grant domid and type. */ if ( !act->pin && (((scombo.flags & mask) != GTF_permit_access) || (scombo.domid != ldomid)) ) -PIN_FAIL(done, GNTST_general_error, +{ +gdprintk(XENLOG_WARNING, "Bad flags (%x) or dom (%d); expected d%d\n", scombo.flags, scombo.domid, ldomid); +rc = GNTST_general_error; +goto done; +} new = scombo; new.flags |= GTF_reading; @@ -796,8 +793,12 @@ static int _set_status_v1(const grant_entry_header_t *shah, { new.flags |= GTF_writing; if ( unlikely(scombo.flags & GTF_readonly) ) -PIN_FAIL(done, GNTST_general_error, +{ +gdprintk(XENLOG_WARNING, "Attempt to write-pin a r/o grant entry\n"); +rc = GNTST_general_error; +goto done; +} } prev.raw = guest_cmpxchg(rd, raw_shah, scombo.raw, new.raw); @@ -805,8 +806,11 @@ static int _set_status_v1(const grant_entry_header_t *shah, break; if ( retries++ == 4 ) -PIN_FAIL(done, GNTST_general_error, - "Shared grant entry is unstable\n"); +{ +gdprintk(XENLOG_WARNING, "Shared grant entry is unstable\n"); +rc = GNTST_general_error; +goto done; +} scombo = prev; } @@ -840,9 +844,13 @@ static int _set_status_v2(const grant_entry_header_t *shah, scombo.flags & mask) != GTF_permit_access) && (mapflag || ((scombo.flags & mask) != GTF_transitive))) || (scombo.domid != ldomid)) ) -PIN_FAIL(done, GNTST_general_error, +{ +gdprintk(XENLOG_WARNING, "Bad flags (%x) or dom (%d); expected d%d, flags %x\n", scombo.flags, scombo.domid, ldomid, mask); +rc = GNTST_general_error; +goto done; +} if ( readonly ) { @@ -851,8 +859,12 @@ static int _set_status_v2(const grant_entry_header_t *shah, else { if ( unlikely(scombo.flags & GTF_readonly) ) -PIN_FAIL(done, GNTST_general_error, +{ +gdprintk(XENLOG_WARNING, "Attempt to write-pin a r/o grant entry\n"); +rc = GNTST_general_error; +goto done; +} *status |= GTF_reading | GTF_writing; } @@ -870,9 +882,11 @@ static int _set_status_v2(const grant_entry_header_t *shah, (!readonly && (scombo.flags & GTF_readonly)) ) { gnttab_clear_flags(rd, GTF_writing | GTF_reading, status); -PIN_FAIL(done, GNTST_general_error, +gdprintk(XENLOG_WARNING, "Unstable flags (%x) or dom (%d); expected d%d (r/w: %d)\n", scombo.flags, scombo.domid, ldomid, !readonly); +rc = GNTST_general_error; +goto done; } } else @@ -880,8 +894,9 @@ static int _set_status_v2(const grant_entry_header_t *shah, if ( unlikely(scombo.flags & GTF_readonly) ) { gnttab_clear_flags(rd, GTF_writing, status); -PIN_FAIL(done, GNTST_general_error, - "Unstable grant readonly f
Re: [PATCH v3 2/4] xen: Add files needed for minimal ppc64le build
On Wed Jun 14, 2023 at 10:51 AM CDT, Jan Beulich wrote: > On 13.06.2023 16:50, Shawn Anastasio wrote: > > --- /dev/null > > +++ b/xen/arch/ppc/Makefile > > @@ -0,0 +1,16 @@ > > +obj-$(CONFIG_PPC64) += ppc64/ > > + > > +$(TARGET): $(TARGET)-syms > > + cp -f $< $@ > > + > > +$(TARGET)-syms: $(objtree)/prelink.o $(obj)/xen.lds > > + $(LD) $(XEN_LDFLAGS) -T $(obj)/xen.lds -N $< $(build_id_linker) -o $@ > > + $(NM) -pa --format=sysv $(@D)/$(@F) \ > > + | $(objtree)/tools/symbols --all-symbols --xensyms --sysv > > --sort \ > > + >$(@D)/$(@F).map > > Elsewhere we recently switched these uses of $(@D)/$(@F) to just $@. > Please can you do so here as well? Sure, will fix in v4. > > --- /dev/null > > +++ b/xen/arch/ppc/arch.mk > > @@ -0,0 +1,11 @@ > > + > > +# Power-specific definitions > > + > > +ppc-march-$(CONFIG_POWER_ISA_2_07B) := power8 > > +ppc-march-$(CONFIG_POWER_ISA_3_00) := power9 > > + > > +CFLAGS += -mcpu=$(ppc-march-y) -mstrict-align -mcmodel=large -mabi=elfv2 > > -mno-altivec -mno-vsx > > Wouldn't it make sense to also pass -mlittle here, such that a tool > chain defaulting to big-endian can still be used? Good call. On this topic, I suppose I'll also add -m64 to allow 32-bit toolchains to be used as well. > > --- /dev/null > > +++ b/xen/arch/ppc/ppc64/head.S > > @@ -0,0 +1,27 @@ > > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > > + > > +.section .text.header, "ax", %progbits > > + > > +ENTRY(start) > > +/* > > + * Depending on how we were booted, the CPU could be running in either > > + * Little Endian or Big Endian mode. The following trampoline from > > Linux > > + * cleverly uses an instruction that encodes to a NOP if the CPU's > > + * endianness matches the assumption of the assembler (LE, in our case) > > + * or a branch to code that performs the endian switch in the other > > case. > > + */ > > +tdi 0, 0, 0x48/* Reverse endian of b . + 8 */ > > +b $ + 44 /* Skip trampoline if endian is good */ > > If I get this right, $ and . are interchangable on Power? If not, > then all is fine and there likely is a reason to use . in the > comment but $ in the code. But if so, it would be nice if both > could match, and I guess with other architectures in mind . would > be preferable. As hinted by the comment, this code was directly inherited from Linux and I'm not sure why the original author chose '$' instead of '.'. That said, as far as I can tell you are correct about the two being interchangeable, and changing the $ to . results in the exact same machine code. I can go ahead and make the change for consistency in v4. > > +DECL_SECTION(.bss) { /* BSS */ > > +__bss_start = .; > > +*(.bss.stack_aligned) > > +. = ALIGN(PAGE_SIZE); > > +*(.bss.page_aligned) > > ... the one between the two .bss parts looks unmotivated. Within > a section definition ALIGN() typically only makes sense when followed > by a label definition, like ... Correct me if I'm wrong, but wouldn't the ALIGN here serve to ensure that the subsequent '.bss.page_aligned' section has the correct alignment that its name implies? > Jan Thanks, Shawn
[qemu-mainline test] 181428: regressions - FAIL
flight 181428 qemu-mainline real [real] http://logs.test-lab.xenproject.org/osstest/logs/181428/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-amd64-xsm 6 xen-buildfail REGR. vs. 180691 build-arm64 6 xen-buildfail REGR. vs. 180691 build-arm64-xsm 6 xen-buildfail REGR. vs. 180691 build-amd64 6 xen-buildfail REGR. vs. 180691 build-i3866 xen-buildfail REGR. vs. 180691 build-i386-xsm6 xen-buildfail REGR. vs. 180691 build-armhf 6 xen-buildfail REGR. vs. 180691 Tests which did not succeed, but are not blocking: test-amd64-i386-xl-qemuu-debianhvm-amd64-shadow 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-debianhvm-i386-xsm 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-dmrestrict-amd64-dmrestrict 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-win7-amd64 1 build-check(1) blocked n/a test-amd64-i386-xl-qemuu-ws16-amd64 1 build-check(1) blocked n/a test-amd64-i386-xl-shadow 1 build-check(1) blocked n/a test-amd64-i386-xl-vhd1 build-check(1) blocked n/a test-amd64-i386-xl-xsm1 build-check(1) blocked n/a test-arm64-arm64-libvirt-raw 1 build-check(1) blocked n/a test-arm64-arm64-libvirt-xsm 1 build-check(1) blocked n/a test-arm64-arm64-xl 1 build-check(1) blocked n/a test-arm64-arm64-xl-credit1 1 build-check(1) blocked n/a test-arm64-arm64-xl-credit2 1 build-check(1) blocked n/a test-arm64-arm64-xl-thunderx 1 build-check(1) blocked n/a test-arm64-arm64-xl-vhd 1 build-check(1) blocked n/a test-arm64-arm64-xl-xsm 1 build-check(1) blocked n/a test-armhf-armhf-libvirt 1 build-check(1) blocked n/a test-armhf-armhf-libvirt-qcow2 1 build-check(1) blocked n/a test-armhf-armhf-libvirt-raw 1 build-check(1) blocked n/a test-armhf-armhf-xl 1 build-check(1) blocked n/a test-armhf-armhf-xl-arndale 1 build-check(1) blocked n/a test-armhf-armhf-xl-multivcpu 1 build-check(1) blocked n/a test-armhf-armhf-xl-rtds 1 build-check(1) blocked n/a test-armhf-armhf-xl-vhd 1 build-check(1) blocked n/a test-amd64-i386-qemuu-rhel6hvm-amd 1 build-check(1) blocked n/a test-amd64-i386-pair 1 build-check(1) blocked n/a test-amd64-i386-libvirt-xsm 1 build-check(1) blocked n/a test-amd64-i386-libvirt-raw 1 build-check(1) blocked n/a test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 1 build-check(1) blocked n/a test-amd64-i386-libvirt-pair 1 build-check(1) blocked n/a test-amd64-i386-libvirt 1 build-check(1) blocked n/a test-amd64-i386-freebsd10-i386 1 build-check(1) blocked n/a test-amd64-i386-freebsd10-amd64 1 build-check(1) blocked n/a test-amd64-coresched-i386-xl 1 build-check(1) blocked n/a test-amd64-coresched-amd64-xl 1 build-check(1) blocked n/a test-amd64-amd64-xl-xsm 1 build-check(1) blocked n/a build-amd64-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-xl-shadow1 build-check(1) blocked n/a test-amd64-amd64-xl-rtds 1 build-check(1) blocked n/a build-arm64-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-ws16-amd64 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-win7-amd64 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-ovmf-amd64 1 build-check(1) blocked n/a build-armhf-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 1 build-check(1) blocked n/a build-i386-libvirt1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm 1 build-check(1) blocked n/a test-amd64-amd64-dom0pvh-xl-amd 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-debianhvm-amd64-shadow 1 build-check(1) blocked n/a test-amd64-amd64-dom0pvh-xl-intel 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-debianhvm-amd64 1 build-check(1)blocked n/a test-amd64-amd64-libvirt 1 build-check(1) blocked n/a test-amd64-amd64-libvirt-pair 1 build-check(1) blocked n/a test-amd64-amd64-libvirt-qemuu-de
Re: [PATCH v4 21/34] arm64: Convert various functions to use ptdescs
On Mon, Jun 12, 2023 at 02:04:10PM -0700, Vishal Moola (Oracle) wrote: > As part of the conversions to replace pgtable constructor/destructors with > ptdesc equivalents, convert various page table functions to use ptdescs. > > Signed-off-by: Vishal Moola (Oracle) Acked-by: Catalin Marinas
Re: [PATCH v9 02/42] mm: Move pte/pmd_mkwrite() callers with no VMA to _novma()
On Tue, 2023-06-13 at 19:00 +0200, David Hildenbrand wrote: > On 13.06.23 18:19, Edgecombe, Rick P wrote: > > On Tue, 2023-06-13 at 10:44 +0300, Mike Rapoport wrote: > > > > Previous patches have done the first step, so next move the > > > > callers > > > > that > > > > don't have a VMA to pte_mkwrite_novma(). Also do the same for > > > > > > I hear x86 maintainers asking to drop "previous patches" ;-) > > > > > > Maybe > > > This is the second step of the conversion that moves the callers > > > ... > > > > Really? I've not heard that. Just a strong aversion to "this > > patch". > > I've got feedback to say "previous patches" and not "the last > > patch" so > > it doesn't get stale. I guess it could be "previous changes". > > Talking about patches make sense when discussing literal patches sent > to > the mailing list. In the git log, it's commit, and "future commits" > or > "follow-up work". > > Yes, we use "patches" all of the time in commit logs, especially when > we > include the cover letter in the commit message (as done frequently > in > the -mm tree). I think I'll switch over to talking about "changes". If you talk about commits it doesn't make as much sense when they are still just patches. Thanks.
Re: [PATCH v3 3/4] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as appropriate
On 13/06/2023 10:30 am, Jan Beulich wrote: > On 12.06.2023 18:13, Andrew Cooper wrote: >> @@ -593,15 +596,93 @@ static bool __init retpoline_calculations(void) >> return false; >> >> /* >> - * RSBA may be set by a hypervisor to indicate that we may move to a >> - * processor which isn't retpoline-safe. >> + * The meaning of the RSBA and RRSBA bits have evolved over time. The >> + * agreed upon meaning at the time of writing (May 2023) is thus: >> + * >> + * - RSBA (RSB Alternative) means that an RSB may fall back to an >> + * alternative predictor on underflow. Skylake uarch and later all >> have >> + * this property. Broadwell too, when running microcode versions >> prior >> + * to Jan 2018. >> + * >> + * - All eIBRS-capable processors suffer RSBA, but eIBRS also introduces >> + * tagging of predictions with the mode in which they were learned. >> So >> + * when eIBRS is active, RSBA becomes RRSBA (Restricted RSBA). >> + * >> + * - CPUs are not expected to enumerate both RSBA and RRSBA. >> + * >> + * Some parts (Broadwell) are not expected to ever enumerate this >> + * behaviour directly. Other parts have differing enumeration with >> + * microcode version. Fix up Xen's idea, so we can advertise them >> safely >> + * to guests, and so toolstacks can level a VM safety for migration. >> + * >> + * The following states exist: >> + * >> + * | | RSBA | EIBRS | RRSBA | Notes | Action| >> + * |---+--+---+---++---| >> + * | 1 |0 | 0 | 0 | OK (older parts) | Maybe +RSBA | >> + * | 2 |0 | 0 | 1 | Broken | +RSBA, -RRSBA | >> + * | 3 |0 | 1 | 0 | OK (pre-Aug ucode) | +RRSBA| >> + * | 4 |0 | 1 | 1 | OK | | >> + * | 5 |1 | 0 | 0 | OK | | >> + * | 6 |1 | 0 | 1 | Broken | -RRSBA| >> + * | 7 |1 | 1 | 0 | Broken | -RSBA, +RRSBA | >> + * | 8 |1 | 1 | 1 | Broken | -RSBA | > You've kept the Action column as you had it originally, despite no longer > applying all the fixups. Wouldn't it make sense to mark those we don't do, > e.g. by enclosing in parentheses? Hmm, yes. How does this look? | | RSBA | EIBRS | RRSBA | Notes | Action (in principle) | |---+--+---+---++---| | 1 | 0 | 0 | 0 | OK (older parts) | Maybe +RSBA | | 2 | 0 | 0 | 1 | Broken | (+RSBA, -RRSBA) | | 3 | 0 | 1 | 0 | OK (pre-Aug ucode) | +RRSBA | | 4 | 0 | 1 | 1 | OK | | | 5 | 1 | 0 | 0 | OK | | | 6 | 1 | 0 | 1 | Broken | (-RRSBA) | | 7 | 1 | 1 | 0 | Broken | (-RSBA, +RRSBA) | | 8 | 1 | 1 | 1 | Broken | (-RSBA) | >> + * further investigation. >> + */ >> +if ( cpu_has_eibrs ? cpu_has_rsba /* Rows 7, 8 */ >> + : cpu_has_rrsba /* Rows 2, 6 */ ) >> +{ >> +printk(XENLOG_ERR >> + "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RSBA %u, >> EIBRS %u, RRSBA %u\n", >> + boot_cpu_data.x86, boot_cpu_data.x86_model, >> + boot_cpu_data.x86_mask, ucode_rev, >> + cpu_has_rsba, cpu_has_eibrs, cpu_has_rrsba); > Perhaps with adjustments (as you deem them sensible) > Reviewed-by: Jan Beulich Thanks. ~Andrew
[xen-unstable test] 181423: regressions - FAIL
flight 181423 xen-unstable real [real] http://logs.test-lab.xenproject.org/osstest/logs/181423/ Regressions :-( Tests which did not succeed and are blocking, including tests which could not be run: build-arm64-pvops 6 kernel-build fail REGR. vs. 181415 Tests which are failing intermittently (not blocking): test-amd64-i386-livepatch 7 xen-install fail in 181415 pass in 181423 test-amd64-amd64-xl-qemuu-win7-amd64 12 windows-install fail in 181415 pass in 181423 test-amd64-amd64-xl-qcow221 guest-start/debian.repeat fail pass in 181415 Tests which did not succeed, but are not blocking: test-arm64-arm64-examine 1 build-check(1) blocked n/a test-arm64-arm64-libvirt-raw 1 build-check(1) blocked n/a test-arm64-arm64-libvirt-xsm 1 build-check(1) blocked n/a test-arm64-arm64-xl 1 build-check(1) blocked n/a test-arm64-arm64-xl-credit1 1 build-check(1) blocked n/a test-arm64-arm64-xl-credit2 1 build-check(1) blocked n/a test-arm64-arm64-xl-thunderx 1 build-check(1) blocked n/a test-arm64-arm64-xl-vhd 1 build-check(1) blocked n/a test-arm64-arm64-xl-xsm 1 build-check(1) blocked n/a test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stop fail blocked in 181415 test-amd64-amd64-xl-qcow222 guest-start.2 fail in 181415 blocked in 181423 test-arm64-arm64-xl-xsm 15 migrate-support-check fail in 181415 never pass test-arm64-arm64-xl-xsm 16 saverestore-support-check fail in 181415 never pass test-arm64-arm64-libvirt-xsm 15 migrate-support-check fail in 181415 never pass test-arm64-arm64-libvirt-xsm 16 saverestore-support-check fail in 181415 never pass test-arm64-arm64-xl-credit2 15 migrate-support-check fail in 181415 never pass test-arm64-arm64-xl-credit2 16 saverestore-support-check fail in 181415 never pass test-arm64-arm64-xl-credit1 15 migrate-support-check fail in 181415 never pass test-arm64-arm64-xl-credit1 16 saverestore-support-check fail in 181415 never pass test-arm64-arm64-xl-thunderx 15 migrate-support-check fail in 181415 never pass test-arm64-arm64-xl-thunderx 16 saverestore-support-check fail in 181415 never pass test-arm64-arm64-xl 15 migrate-support-check fail in 181415 never pass test-arm64-arm64-xl 16 saverestore-support-check fail in 181415 never pass test-arm64-arm64-libvirt-raw 14 migrate-support-check fail in 181415 never pass test-arm64-arm64-libvirt-raw 15 saverestore-support-check fail in 181415 never pass test-arm64-arm64-xl-vhd 14 migrate-support-check fail in 181415 never pass test-arm64-arm64-xl-vhd 15 saverestore-support-check fail in 181415 never pass test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 181415 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 181415 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 181415 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 181415 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 181415 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 181415 test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 12 debian-hvm-install fail like 181415 test-armhf-armhf-libvirt-raw 15 saverestore-support-checkfail like 181415 test-armhf-armhf-libvirt 16 saverestore-support-checkfail like 181415 test-amd64-i386-xl-vhd 21 guest-start/debian.repeatfail like 181415 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 181415 test-armhf-armhf-libvirt-qcow2 15 saverestore-support-check fail like 181415 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 181415 test-amd64-amd64-libvirt 15 migrate-support-checkfail never pass test-amd64-i386-xl-pvshim14 guest-start fail never pass test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-i386-libvirt 15 migrate-support-checkfail never pass test-amd64-i386-libvirt-xsm 15 migrate-support-checkfail never pass test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check fail never pass test-amd64-i386-libvirt-raw 14 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 15 migrate-support-checkfail never pass test-armhf-armhf-xl-rtds 16 saverestore-support-checkfail never pass test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 15 migrate-support-checkfail never pass test-armhf-armhf-xl-multivcpu 16 saverestore-support-checkfail never pass test-armhf-armhf-xl-vhd 14 migrate-support-checkfail never pass test-armhf-armhf-xl-vhd
Re: Functions _spin_lock_cb() and handle_ro_raz()
(+ Bertrand and Stefano) On 14/06/2023 14:08, Federico Serafini wrote: Hello everyone, Hi Federico, Let me start with a tip to help reaching the maintainers and getting a more timely answer. Xen-devel has a large volume of e-mails (still less than Linux :)). So some of us will have filter to try to classify the e-mails received. Commonly, all the e-mails where the person is in the CC/To list will go to the inbox. All the others will go a separate directory that may or may not be watched. Personally, I tend to glance in that directory, but I would not read all of them. So I would highly recommend to CC the maintainers/reviewers of the specific component. You can find them in MAINTAINERS at the root of the Xen repository. We also have script like scripts/get_maintainers.pl that can help you to find who to CC. If you pass '-f ', it will output the maintainers of that file. You can also use the script with patch to find all the maintainers to CC. Now back to the subject of the e-mail. I am working on the violations of MISRA C:2012 Rule 8.10, whose headline says: "An inline function shall be declared with the static storage class". For both ARM64 and X86_64 builds, function _spin_lock_cb() defined in spinlock.c violates the rule. Such function is declared in spinlock.h without the inline function specifier: are there any reasons to do this? What about solving the violation by moving the function definition in spinlock.h and declaring it as static inline? Jan answered it and sent a patch. So I will skip the reply for this one. The same happens also for the function handle_ro_raz() in the ARM64 build, declared in traps.h and defined in traps.c. I looked at the history and it is not clear to me why the 'inline' was added at first. That said, I don't see any value to ask the compiler to inline (which it would be free to ignore) the function. So I would suggest to send a patch to remove the 'inline'. Best regards, -- Julien Grall
[PATCH v4 01/15] cpufreq: Allow restricting to internal governors only
For hwp, the standard governors are not usable, and only the internal one is applicable. Add the cpufreq_governor_internal boolean to indicate when an internal governor, like hwp, will be used. This is set during presmp_initcall, so that it can suppress governor registration during initcall. Add an internal flag to struct cpufreq_governor to indicate such governors. This way, the unusable governors are not registered, so the internal one is the only one returned to userspace. This means incompatible governors won't be advertised to userspace. Signed-off-by: Jason Andryuk --- v4: Rework to use an internal flag Removed Jan's Ack since the approach is different. v3: Switch to initdata Add Jan Acked-by Commit message s/they/the/ typo Don't register hwp-internal when running non-hwp - Marek v2: Switch to "-internal" Add blank line in header --- xen/drivers/cpufreq/cpufreq.c | 7 +++ xen/include/acpi/cpufreq/cpufreq.h | 3 +++ 2 files changed, 10 insertions(+) diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c index 2321c7dd07..cccf9a64c8 100644 --- a/xen/drivers/cpufreq/cpufreq.c +++ b/xen/drivers/cpufreq/cpufreq.c @@ -56,6 +56,7 @@ struct cpufreq_dom { }; static LIST_HEAD_READ_MOSTLY(cpufreq_dom_list_head); +bool __initdata cpufreq_governor_internal; struct cpufreq_governor *__read_mostly cpufreq_opt_governor; LIST_HEAD_READ_MOSTLY(cpufreq_governor_list); @@ -121,6 +122,12 @@ int __init cpufreq_register_governor(struct cpufreq_governor *governor) if (!governor) return -EINVAL; +if (cpufreq_governor_internal && !governor->internal) +return -EINVAL; + +if (!cpufreq_governor_internal && governor->internal) +return -EINVAL; + if (__find_governor(governor->name) != NULL) return -EEXIST; diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h index 35dcf21e8f..1c0872506a 100644 --- a/xen/include/acpi/cpufreq/cpufreq.h +++ b/xen/include/acpi/cpufreq/cpufreq.h @@ -106,6 +106,7 @@ struct cpufreq_governor { unsigned int event); bool_t (*handle_option)(const char *name, const char *value); struct list_head governor_list; +boolinternal; }; extern struct cpufreq_governor *cpufreq_opt_governor; @@ -114,6 +115,8 @@ extern struct cpufreq_governor cpufreq_gov_userspace; extern struct cpufreq_governor cpufreq_gov_performance; extern struct cpufreq_governor cpufreq_gov_powersave; +extern bool cpufreq_governor_internal; + extern struct list_head cpufreq_governor_list; extern int cpufreq_register_governor(struct cpufreq_governor *governor); -- 2.40.1
[PATCH v4 03/15] cpufreq: Export intel_feature_detect
Export feature_detect as intel_feature_detect so it can be re-used by HWP. Signed-off-by: Jason Andryuk Acked-by: Jan Beulich --- v4: Add Jan's Ack v3: Remove void * cast when calling intel_feature_detect v2: export intel_feature_detect with typed pointer Move intel_feature_detect to acpi/cpufreq/cpufreq.h since the declaration now contains struct cpufreq_policy *. --- xen/arch/x86/acpi/cpufreq/cpufreq.c | 8 ++-- xen/include/acpi/cpufreq/cpufreq.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c index 6c70d04395..f1cc473b4f 100644 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c @@ -339,9 +339,8 @@ static unsigned int cf_check get_cur_freq_on_cpu(unsigned int cpu) return extract_freq(get_cur_val(cpumask_of(cpu)), data); } -static void cf_check feature_detect(void *info) +void intel_feature_detect(struct cpufreq_policy *policy) { -struct cpufreq_policy *policy = info; unsigned int eax; eax = cpuid_eax(6); @@ -353,6 +352,11 @@ static void cf_check feature_detect(void *info) } } +static void cf_check feature_detect(void *info) +{ +intel_feature_detect(info); +} + static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, struct acpi_cpufreq_data *data) { diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h index e2e03b8bd7..a49efd1cb2 100644 --- a/xen/include/acpi/cpufreq/cpufreq.h +++ b/xen/include/acpi/cpufreq/cpufreq.h @@ -244,4 +244,6 @@ int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq); void cpufreq_dbs_timer_suspend(void); void cpufreq_dbs_timer_resume(void); +void intel_feature_detect(struct cpufreq_policy *policy); + #endif /* __XEN_CPUFREQ_PM_H__ */ -- 2.40.1
[PATCH v4 00/15] Intel Hardware P-States (HWP) support
Hi, This patch series adds Hardware-Controlled Performance States (HWP) for Intel processors to Xen. v2 was only partially reviewed, so v3 is mostly a reposting of v2. In v2 & v3, I think I addressed all comments for v1. I kept patch 11 "xenpm: Factor out a non-fatal cpuid_parse variant", with a v2 comment explaining why I keep it. v3 adds "xen/x86: Tweak PDC bits when using HWP". Qubes testing revealed an issue where enabling HWP can crash firwmare code (maybe SMM). This requires a Linux change to get the PDC bits from Xen and pass them to ACPI. Roger has a patch [0] to set the PDC bits. Roger's 3 patch series was tested with "xen/x86: Tweak PDC bits when using HWP" on affected hardware and allowed proper operation. v4: There is a large amount or renaming from HWP/hwp to CPPC/cppc in the series. The driver remains hwp_ prefixed since it is dealing with the hardware interface. The sysctl, xc and xenpm interfaces were renamed to cppc to be the generic ACPI CPPC (Collaborative Processor Performance Control) interface. struct xen_get_cpufreq_para was re-organized in a binary compatible fashion to nest scaling governor options. This allows the cppc support to use uint32_t's for its parameters. HWP is now enabled with a top-level cpufreq=hwp option. It will fallback to cpufreq=xen if hwp is unavailable. This seems like the most user-friendly option. Since the user was trying to specify *some* cpufreq, we should give them the best that we can instead of disabling the functionality. "xenpm: Factor out a non-fatal cpuid_parse variant" was dropped. set-cpufreq-cppc expects either a cpu number or none specified, which implies all. Some patches were re-arrange - "xen/x86: Tweak PDC bits when using HWP" now comes immediately after "cpufreq: Add Hardware P-State (HWP) driver" The implementation of "cpufreq: Allow restricting to internal governors only " changed, so I removed Jan's Ack. Previous cover letter: With HWP, the processor makes its own determinations for frequency selection, though users can set some parameters and preferences. There is also Turbo Boost which dynamically pushes the max frequency if possible. The existing governors don't work with HWP since they select frequencies and HWP doesn't expose those. Therefore a dummy hwp-interal governor is used that doesn't do anything. xenpm get-cpufreq-para is extended to show HWP parameters, and set-cpufreq-cppc is added to set them. A lightly loaded OpenXT laptop showed ~1W power savings according to powertop. A mostly idle Fedora system (dom0 only) showed a more modest power savings. This is for a 10th gen 6-core 1600 MHz base 4900 MHZ max cpu. In the default balance mode, Turbo Boost doesn't exceed 4GHz. Tweaking the energy_perf preference with `xenpm set-cpufreq-para balance ene:64`, I've seen the CPU hit 4.7GHz before throttling down and bouncing around between 4.3 and 4.5 GHz. Curiously the other cores read ~4GHz when turbo boost takes affect. This was done after pinning all dom0 cores, and using taskset to pin to vCPU/pCPU 11 and running a bash tightloop. HWP defaults to disabled and running with the existing HWP configuration - it doesn't reconfigure by default. It can be enabled with cpufreq=hwp. Hardware Duty Cycling (HDC) is another feature to autonomously powerdown things. It defaults to enabled when HWP is enabled, but HDC can be disabled on the command line. cpufreq=xen:hwp,no-hdc I've only tested on 8th gen and 10th gen systems with activity window and energy_perf support. So the pathes for CPUs lacking those features are untested. Fast MSR support was removed in v2. The model specific checking was not done properly, and I don't have hardware to test with. Since writes are expected to be infrequent, I just removed the code. This changes the systcl_pm_op hypercall, so that wants review. Regards, Jason [0] https://lore.kernel.org/xen-devel/20221121102113.41893-3-roger@citrix.com/ Jason Andryuk (15): cpufreq: Allow restricting to internal governors only cpufreq: Add perf_freq to cpuinfo cpufreq: Export intel_feature_detect xen/sysctl: Nest cpufreq scaling options pmstat&xenpm: Re-arrage for cpufreq union cpufreq: Add Hardware P-State (HWP) driver xen/x86: Tweak PDC bits when using HWP xenpm: Change get-cpufreq-para output for hwp cpufreq: Export HWP parameters to userspace as CPPC libxc: Include cppc_para in definitions xenpm: Print HWP/CPPC parameters xen: Add SET_CPUFREQ_HWP xen_sysctl_pm_op libxc: Add xc_set_cpufreq_cppc xenpm: Add set-cpufreq-cppc subcommand CHANGELOG: Add Intel HWP entry CHANGELOG.md | 2 +- docs/misc/xen-command-line.pandoc | 9 +- tools/include/xenctrl.h | 28 +- tools/libs/ctrl/xc_pm.c | 35 +- tools/misc/xenpm.c| 385 +++-- xen/arch/x86/acpi/cpufreq/Makefile| 1 + xen/arch/x86/acpi/cpufreq/cpufreq.c | 15 +- xen/arch
[PATCH v4 02/15] cpufreq: Add perf_freq to cpuinfo
acpi-cpufreq scales the aperf/mperf measurements by max_freq, but HWP needs to scale by base frequency. Settings max_freq to base_freq "works" but the code is not obvious, and returning values to userspace is tricky. Add an additonal perf_freq member which is used for scaling aperf/mperf measurements. Signed-off-by: Jason Andryuk Acked-by: Jan Beulich --- v3: Add Jan's Ack I don't like this, but it seems the best way to re-use the common aperf/mperf code. The other option would be to add wrappers that then do the acpi vs. hwp scaling. --- xen/arch/x86/acpi/cpufreq/cpufreq.c | 2 +- xen/drivers/cpufreq/utility.c | 1 + xen/include/acpi/cpufreq/cpufreq.h | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c index 2e0067fbe5..6c70d04395 100644 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c @@ -316,7 +316,7 @@ unsigned int get_measured_perf(unsigned int cpu, unsigned int flag) else perf_percent = 0; -return policy->cpuinfo.max_freq * perf_percent / 100; +return policy->cpuinfo.perf_freq * perf_percent / 100; } static unsigned int cf_check get_cur_freq_on_cpu(unsigned int cpu) diff --git a/xen/drivers/cpufreq/utility.c b/xen/drivers/cpufreq/utility.c index 9eb7ecedcd..6831f62851 100644 --- a/xen/drivers/cpufreq/utility.c +++ b/xen/drivers/cpufreq/utility.c @@ -236,6 +236,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, policy->min = policy->cpuinfo.min_freq = min_freq; policy->max = policy->cpuinfo.max_freq = max_freq; +policy->cpuinfo.perf_freq = max_freq; policy->cpuinfo.second_max_freq = second_max_freq; if (policy->min == ~0) diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h index 1c0872506a..e2e03b8bd7 100644 --- a/xen/include/acpi/cpufreq/cpufreq.h +++ b/xen/include/acpi/cpufreq/cpufreq.h @@ -37,6 +37,9 @@ extern struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS]; struct cpufreq_cpuinfo { unsigned intmax_freq; unsigned intsecond_max_freq;/* P1 if Turbo Mode is on */ +unsigned intperf_freq; /* Scaling freq for aperf/mpref. + acpi-cpufreq uses max_freq, but HWP uses + base_freq.*/ unsigned intmin_freq; unsigned inttransition_latency; /* in 10^(-9) s = nanoseconds */ }; -- 2.40.1
[PATCH v4 04/15] xen/sysctl: Nest cpufreq scaling options
Add a union and struct so that most of the scaling variables of struct xen_get_cpufreq_para are within in a binary-compatible layout. This allows cppc_para to live in the larger union and use uint32_ts - struct xen_cppc_para will be 10 uint32_t's. The new scaling struct is 3 * uint32_t + 16 bytes CPUFREQ_NAME_LEN + 4 * uint32_t for xen_ondemand = 11 uint32_t. That means the old size is retained, int32_t turbo_enabled doesn't move and it's binary compatible. Signed-off-by: Jason Andryuk --- tools/include/xenctrl.h | 22 +- tools/libs/ctrl/xc_pm.c | 5 - tools/misc/xenpm.c | 24 xen/drivers/acpi/pmstat.c | 27 ++- xen/include/public/sysctl.h | 22 +- 5 files changed, 52 insertions(+), 48 deletions(-) diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h index dba33d5d0f..8aedb952a0 100644 --- a/tools/include/xenctrl.h +++ b/tools/include/xenctrl.h @@ -1909,16 +1909,20 @@ struct xc_get_cpufreq_para { uint32_t cpuinfo_cur_freq; uint32_t cpuinfo_max_freq; uint32_t cpuinfo_min_freq; -uint32_t scaling_cur_freq; - -char scaling_governor[CPUFREQ_NAME_LEN]; -uint32_t scaling_max_freq; -uint32_t scaling_min_freq; - -/* for specific governor */ union { -xc_userspace_t userspace; -xc_ondemand_t ondemand; +struct { +uint32_t scaling_cur_freq; + +char scaling_governor[CPUFREQ_NAME_LEN]; +uint32_t scaling_max_freq; +uint32_t scaling_min_freq; + +/* for specific governor */ +union { +xc_userspace_t userspace; +xc_ondemand_t ondemand; +} u; +} s; } u; int32_t turbo_enabled; diff --git a/tools/libs/ctrl/xc_pm.c b/tools/libs/ctrl/xc_pm.c index c3a9864bf7..f92542eaf7 100644 --- a/tools/libs/ctrl/xc_pm.c +++ b/tools/libs/ctrl/xc_pm.c @@ -265,15 +265,10 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid, user_para->cpuinfo_cur_freq = sys_para->cpuinfo_cur_freq; user_para->cpuinfo_max_freq = sys_para->cpuinfo_max_freq; user_para->cpuinfo_min_freq = sys_para->cpuinfo_min_freq; -user_para->scaling_cur_freq = sys_para->scaling_cur_freq; -user_para->scaling_max_freq = sys_para->scaling_max_freq; -user_para->scaling_min_freq = sys_para->scaling_min_freq; user_para->turbo_enabled= sys_para->turbo_enabled; memcpy(user_para->scaling_driver, sys_para->scaling_driver, CPUFREQ_NAME_LEN); -memcpy(user_para->scaling_governor, -sys_para->scaling_governor, CPUFREQ_NAME_LEN); /* copy to user_para no matter what cpufreq governor */ BUILD_BUG_ON(sizeof(((struct xc_get_cpufreq_para *)0)->u) != diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c index 1bb6187e56..ee8ce5d5f2 100644 --- a/tools/misc/xenpm.c +++ b/tools/misc/xenpm.c @@ -730,39 +730,39 @@ static void print_cpufreq_para(int cpuid, struct xc_get_cpufreq_para *p_cpufreq) printf("scaling_avail_gov: %s\n", p_cpufreq->scaling_available_governors); -printf("current_governor : %s\n", p_cpufreq->scaling_governor); -if ( !strncmp(p_cpufreq->scaling_governor, +printf("current_governor : %s\n", p_cpufreq->u.s.scaling_governor); +if ( !strncmp(p_cpufreq->u.s.scaling_governor, "userspace", CPUFREQ_NAME_LEN) ) { printf(" userspace specific :\n"); printf("scaling_setspeed : %u\n", - p_cpufreq->u.userspace.scaling_setspeed); + p_cpufreq->u.s.u.userspace.scaling_setspeed); } -else if ( !strncmp(p_cpufreq->scaling_governor, +else if ( !strncmp(p_cpufreq->u.s.scaling_governor, "ondemand", CPUFREQ_NAME_LEN) ) { printf(" ondemand specific :\n"); printf("sampling_rate: max [%u] min [%u] cur [%u]\n", - p_cpufreq->u.ondemand.sampling_rate_max, - p_cpufreq->u.ondemand.sampling_rate_min, - p_cpufreq->u.ondemand.sampling_rate); + p_cpufreq->u.s.u.ondemand.sampling_rate_max, + p_cpufreq->u.s.u.ondemand.sampling_rate_min, + p_cpufreq->u.s.u.ondemand.sampling_rate); printf("up_threshold : %u\n", - p_cpufreq->u.ondemand.up_threshold); + p_cpufreq->u.s.u.ondemand.up_threshold); } printf("scaling_avail_freq :"); for ( i = 0; i < p_cpufreq->freq_num; i++ ) if ( p_cpufreq->scaling_available_frequencies[i] == - p_cpufreq->scaling_cur_freq ) + p_cpufreq->u.s.scaling_cur_freq ) printf(" *%d", p_cpufreq->scaling_available_frequencies[i]); else printf(" %d", p_cpufreq->scaling_available_frequencies[i]); printf("\n"); printf("scaling frequency: max
[PATCH v4 05/15] pmstat&xenpm: Re-arrage for cpufreq union
Move some code around now that common xen_sysctl_pm_op get_para fields are together. In particular, the scaling governor information like scaling_available_governors is inside the union, so it is not always available. With that, gov_num may be 0, so bounce buffer handling needs to be modified. scaling_governor won't be filled for hwp, so this will simplify the change when it is introduced. Signed-off-by: Jason Andryuk --- tools/libs/ctrl/xc_pm.c | 12 tools/misc/xenpm.c| 3 ++- xen/drivers/acpi/pmstat.c | 32 +--- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/tools/libs/ctrl/xc_pm.c b/tools/libs/ctrl/xc_pm.c index f92542eaf7..19fe1a79dd 100644 --- a/tools/libs/ctrl/xc_pm.c +++ b/tools/libs/ctrl/xc_pm.c @@ -221,7 +221,7 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid, { if ( (!user_para->affected_cpus)|| (!user_para->scaling_available_frequencies)|| - (!user_para->scaling_available_governors) ) + (user_para->gov_num && !user_para->scaling_available_governors) ) { errno = EINVAL; return -1; @@ -230,12 +230,15 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid, goto unlock_1; if ( xc_hypercall_bounce_pre(xch, scaling_available_frequencies) ) goto unlock_2; -if ( xc_hypercall_bounce_pre(xch, scaling_available_governors) ) +if ( user_para->gov_num && + xc_hypercall_bounce_pre(xch, scaling_available_governors) ) goto unlock_3; set_xen_guest_handle(sys_para->affected_cpus, affected_cpus); set_xen_guest_handle(sys_para->scaling_available_frequencies, scaling_available_frequencies); -set_xen_guest_handle(sys_para->scaling_available_governors, scaling_available_governors); +if ( user_para->gov_num ) +set_xen_guest_handle(sys_para->scaling_available_governors, + scaling_available_governors); } sysctl.cmd = XEN_SYSCTL_pm_op; @@ -278,7 +281,8 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid, } unlock_4: -xc_hypercall_bounce_post(xch, scaling_available_governors); +if ( user_para->gov_num ) +xc_hypercall_bounce_post(xch, scaling_available_governors); unlock_3: xc_hypercall_bounce_post(xch, scaling_available_frequencies); unlock_2: diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c index ee8ce5d5f2..1c474c3b59 100644 --- a/tools/misc/xenpm.c +++ b/tools/misc/xenpm.c @@ -811,7 +811,8 @@ static int show_cpufreq_para_by_cpuid(xc_interface *xc_handle, int cpuid) ret = -ENOMEM; goto out; } -if (!(p_cpufreq->scaling_available_governors = +if (p_cpufreq->gov_num && +!(p_cpufreq->scaling_available_governors = malloc(p_cpufreq->gov_num * CPUFREQ_NAME_LEN * sizeof(char { fprintf(stderr, diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c index f5a9ac3f1a..57359c21d8 100644 --- a/xen/drivers/acpi/pmstat.c +++ b/xen/drivers/acpi/pmstat.c @@ -239,11 +239,24 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op) if ( ret ) return ret; +op->u.get_para.cpuinfo_cur_freq = +cpufreq_driver.get ? cpufreq_driver.get(op->cpuid) : policy->cur; +op->u.get_para.cpuinfo_max_freq = policy->cpuinfo.max_freq; +op->u.get_para.cpuinfo_min_freq = policy->cpuinfo.min_freq; +op->u.get_para.turbo_enabled = cpufreq_get_turbo_status(op->cpuid); + +if ( cpufreq_driver.name[0] ) +strlcpy(op->u.get_para.scaling_driver, +cpufreq_driver.name, CPUFREQ_NAME_LEN); +else +strlcpy(op->u.get_para.scaling_driver, "Unknown", CPUFREQ_NAME_LEN); + if ( !(scaling_available_governors = xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) ) return -ENOMEM; -if ( (ret = read_scaling_available_governors(scaling_available_governors, -gov_num * CPUFREQ_NAME_LEN * sizeof(char))) ) +if ( (ret = read_scaling_available_governors( +scaling_available_governors, +gov_num * CPUFREQ_NAME_LEN * sizeof(char))) ) { xfree(scaling_available_governors); return ret; @@ -254,26 +267,16 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op) if ( ret ) return ret; -op->u.get_para.cpuinfo_cur_freq = -cpufreq_driver.get ? cpufreq_driver.get(op->cpuid) : policy->cur; -op->u.get_para.cpuinfo_max_freq = policy->cpuinfo.max_freq; -op->u.get_para.cpuinfo_min_freq = policy->cpuinfo.min_freq; - op->u.get_para.u.s.scaling_cur_freq = policy->cur; op->u.get_para.u.s.scaling_max_freq = policy->max; op->u.get_para.u.s.scaling_min_freq = policy->min; -if ( cpufreq_driver.name[0] ) -strlcpy(op->u.get_para.scaling_driver, -cpu