[PATCH 2/3] powerpc: replace #include with #include
Commit ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost") deprecated , which is now a wrapper of . Replace #include with #include . After all the lines are converted, and will be removed. Signed-off-by: Masahiro Yamada --- arch/powerpc/kernel/epapr_hcalls.S | 2 +- arch/powerpc/kernel/fpu.S | 2 +- arch/powerpc/kernel/misc.S | 2 +- arch/powerpc/kernel/misc_32.S | 2 +- arch/powerpc/kernel/misc_64.S | 2 +- arch/powerpc/kernel/tm.S| 2 +- arch/powerpc/kernel/trace/ftrace_low.S | 2 +- arch/powerpc/kernel/ucall.S | 2 +- arch/powerpc/kernel/vector.S| 2 +- arch/powerpc/kvm/book3s_64_entry.S | 2 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- arch/powerpc/kvm/tm.S | 2 +- arch/powerpc/lib/checksum_32.S | 2 +- arch/powerpc/lib/checksum_64.S | 2 +- arch/powerpc/lib/copy_32.S | 2 +- arch/powerpc/lib/copy_mc_64.S | 2 +- arch/powerpc/lib/copypage_64.S | 2 +- arch/powerpc/lib/copyuser_64.S | 2 +- arch/powerpc/lib/hweight_64.S | 2 +- arch/powerpc/lib/mem_64.S | 2 +- arch/powerpc/lib/memcmp_32.S| 2 +- arch/powerpc/lib/memcmp_64.S| 2 +- arch/powerpc/lib/memcpy_64.S| 2 +- arch/powerpc/lib/string.S | 2 +- arch/powerpc/lib/string_32.S| 2 +- arch/powerpc/lib/string_64.S| 2 +- arch/powerpc/lib/strlen_32.S| 2 +- arch/powerpc/mm/book3s32/hash_low.S | 2 +- arch/powerpc/sysdev/dcr-low.S | 2 +- 29 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 033116e465d0..1a9b5ae8ccb2 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -3,6 +3,7 @@ * Copyright (C) 2012 Freescale Semiconductor, Inc. */ +#include #include #include #include @@ -12,7 +13,6 @@ #include #include #include -#include #ifndef CONFIG_PPC64 /* epapr_ev_idle() was derived from e500_idle() */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index f71f2bbd4de6..6a9acfb690c9 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -9,6 +9,7 @@ *Copyright (C) 1997 Dan Malek (dma...@jlc.net). */ +#include #include #include #include @@ -18,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index fb7de3543c03..29e1440d14cc 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -10,11 +10,11 @@ * * setjmp/longjmp code by Paul Mackerras. */ +#include #include #include #include #include -#include .text diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index daf8f87d2372..2eabb15687a6 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -8,6 +8,7 @@ * */ +#include #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include #include .text diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 2c9ac70aaf0c..1a8cdafd68e8 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -9,6 +9,7 @@ * PPC64 updates by Dave Engebretsen (engeb...@us.ibm.com) */ +#include #include #include #include @@ -23,7 +24,6 @@ #include #include #include -#include #include .text diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 9feab5e0485b..a9cd6507163a 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -6,13 +6,13 @@ * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. */ +#include #include #include #include #include #include #include -#include #include #ifdef CONFIG_VSX diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_low.S index 294d1e05958a..5e271f87f799 100644 --- a/arch/powerpc/kernel/trace/ftrace_low.S +++ b/arch/powerpc/kernel/trace/ftrace_low.S @@ -3,12 +3,12 @@ * Split from entry_64.S */ +#include #include #include #include #include #include -#include #ifdef CONFIG_PPC64 .pushsection ".tramp.ftrace.text","aw",@progbits; diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S index 07296bc39166..80a1f9a4300a 100644 --- a/arch/powerpc/kernel/ucall.S +++ b/arch/powerpc/kernel/ucall.S @@ -5,8 +5,8 @@ * Copyright 2019, IBM Corporation. * */ +#include #include -#include _GLOBAL(ucall_norets) EXPORT_SYMBOL_GPL(ucall_norets) diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index fcc0ad6d9c7b..4094e4c4c77a 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include
[PATCH 1/3] powerpc: remove unneeded #include
There is no EXPORT_SYMBOL line there, hence #include is unneeded. Signed-off-by: Masahiro Yamada --- arch/powerpc/kernel/entry_32.S | 1 - arch/powerpc/kernel/head_40x.S | 1 - arch/powerpc/kernel/head_44x.S | 1 - arch/powerpc/kernel/head_64.S | 1 - arch/powerpc/kernel/head_85xx.S | 1 - arch/powerpc/kernel/head_8xx.S | 1 - arch/powerpc/kernel/head_book3s_32.S| 1 - arch/powerpc/kernel/trace/ftrace_64_pg.S| 1 - arch/powerpc/kernel/trace/ftrace_mprofile.S | 1 - 9 files changed, 9 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index fe27d41f9a3d..9692acb0361f 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 3f68a1624646..b32e7b2ebdcf 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -38,7 +38,6 @@ #include #include #include -#include #include "head_32.h" diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 63a85c16fef4..a3197c9f721c 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -35,7 +35,6 @@ #include #include #include -#include #include #include "head_booke.h" diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 6440b1bb332a..4690c219bfa4 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -40,7 +40,6 @@ #include #include #include -#include #include #ifdef CONFIG_PPC_BOOK3S #include diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index fdbee1093e2b..97e9ea0c7297 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -40,7 +40,6 @@ #include #include #include -#include #include #include "head_booke.h" diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index a79751e05781..647b0b445e89 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index c51f28b5abc0..6764b98ca360 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S index 6708e24db0ab..cdbcb5a0783b 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.S +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S @@ -8,7 +8,6 @@ #include #include #include -#include _GLOBAL_TOC(ftrace_caller) lbz r3, PACA_FTRACE_ENABLED(r13) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 1f7d86de1538..600406716d66 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include -- 2.39.2
[PATCH 3/3] powerpc: remove
All *.S files under arch/powerpc/ have been converted to include instead of . Remove . Signed-off-by: Masahiro Yamada --- arch/powerpc/include/asm/Kbuild | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 419319c4963c..61a8dcd7 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -3,7 +3,6 @@ generated-y += syscall_table_32.h generated-y += syscall_table_64.h generated-y += syscall_table_spu.h generic-y += agp.h -generic-y += export.h generic-y += kvm_types.h generic-y += mcs_spinlock.h generic-y += qrwlock.h -- 2.39.2
Re: [PATCH net-next v2 10/10] net: fs_enet: Use cpm_muram_xxx() functions instead of cpm_dpxxx() macros
On Fri, Aug 04, 2023 at 03:30:20PM +0200, Christophe Leroy wrote: > cpm_dpxxx() macros are now always referring to cpm_muram_xxx() fonctions nit: fonctions -> functions Thanks Christophe, This minor nit notwithstanding, this series looks good to me. I'll send a reviewed-by tag for the whole series in response to the cover letter. ...
Re: [PATCH net-next v2 00/10] net: fs_enet: Driver cleanup
On Fri, Aug 04, 2023 at 03:30:10PM +0200, Christophe Leroy wrote: > Over the years, platform and driver initialisation have evolved into > more generic ways, and driver or platform specific stuff has gone > away, leaving stale objects behind. > > This series aims at cleaning all that up for fs_enet ethernet driver. > > Changes in v2: > - Remove a trailing whitespace in the old struct moved in patch 7. > - Include powerpc people and list that I forgot when sending v1 > (and Rob as expected by Patchwork for patch 6, not sure why) Thanks, this looks good to me. For the series, Reviewed-by: Simon Horman
Re: [PATCH] floppy: ERROR: that open brace { should be on the previous line
On Fri, Aug 04, 2023 at 04:05:09PM +, Christophe Leroy wrote: > Hello, > > Le 20/07/2023 à 12:17, zhangyongle...@208suo.com a écrit : > > [Vous ne recevez pas souvent de courriers de zhangyongle...@208suo.com. > > D?couvrez pourquoi ceci est important ? > > https://aka.ms/LearnAboutSenderIdentification ] > > > > Fix twoce occurrences of the checkpatch.pl error: > > ERROR: that open brace { should be on the previous line > > > Can you please explain the purpose of those changes ? Do you use some > tools that get disturbed by such cosmetic errors ? Otherwise what is > your reason ? Hi, 208suo.com people do checkpatch fixes (that is, they run scripts/checkpatch.pl -f then try to make the script happy). Steven warned them to not submitting such patches again [1] but they keep spamming maintainers with checkpatch patches (ignoring the review warning). I voiced this concern when reviewing one of their patches and Jani replied that such one-way interaction with kernel communty is detrimental [2]. The exact same situation happened last year involving developers from cdjrlc.com domain. They also did trivial patches, including mostly (and notoriously known for) redundant word stripping. While some of these patches were accepted, others were not with reviews requesting changes in v2, yet they also ignored reviews. In fact, in the early waves of 208suo.com patches, they used the same email infra as 208suo.com people and they sent patches as HTML emails (which were rejected by mailing lists obviously) so that the latter people have to send their patches on their behalf (but corrupted since 208suo.com people used Roundcube instead of git-send-email(1)). Regarding 208suo.com's mail infra, after I pointed out this [3], they changed the infra so that patches sent didn't get corrupted. Thus, they did listen in regard of tooling and infra changes, but they deliberately doesn't answer code reviews. Thanks. [1]: https://lore.kernel.org/lkml/20230720134501.01f9f...@gandalf.local.home/ [2]: https://lore.kernel.org/lkml/87cz07vvwu@intel.com/ [3]: https://lore.kernel.org/lkml/zjk7sc4i+mk98k%...@debian.me/ > > We don't accept such standelone minor cosmetic changes at the first > place because it looks like a waste of time. PS: And in fact, complicating stable backports... -- An old man doll... just what I always wanted! - Clara signature.asc Description: PGP signature
[PATCH v2] powerpc: Use shared font data
From: "Dr. David Alan Gilbert" PowerPC has a 'btext' font used for the console which is almost identical to the shared font_sun8x16, so use it rather than duplicating the data. They were actually identical until about a decade ago when commit bcfbeecea11c ("drivers: console: font_: Change a glyph from "broken bar" to "vertical line"") which changed the | in the shared font to be a solid bar rather than a broken bar. That's the only difference. This was originally spotted by PMD which noticed that sparc does the same thing with the same data, and they also share a bunch of functions to manipulate the data. I've previously posted a near identical patch for sparc. One difference I notice in PowerPC is that there are a bunch of compile options for the .c files for the early code to avoid a bunch of security compilation features; it's not clear to me if this is a problem for this font data. Tested very lightly with a boot without FS in qemu. v2 Added 'select FONT_SUPPORT' (to stop modconfig causing the font to be linked into a module rather than the main kernel) Added 'select FONTS' to satisfy requirements in lib/fonts Signed-off-by: Dr. David Alan Gilbert --- arch/powerpc/Kconfig.debug | 3 + arch/powerpc/kernel/btext.c | 360 +--- 2 files changed, 9 insertions(+), 354 deletions(-) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 2a54fadbeaf51..521c4baf30e88 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -147,6 +147,9 @@ config BDI_SWITCH config BOOTX_TEXT bool "Support for early boot text console (BootX or OpenFirmware only)" depends on PPC_BOOK3S + select FONT_SUN8x16 + select FONT_SUPPORT + select FONTS help Say Y here to see progress messages from the boot firmware in text mode. Requires either BootX or Open Firmware. diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 19e46fd623b0d..7f63f1cdc6c39 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -41,10 +42,6 @@ static unsigned char *logicalDisplayBase __force_data; unsigned long disp_BAT[2] __initdata = {0, 0}; -#define cmapsz (16*256) - -static unsigned char vga_font[cmapsz]; - static int boot_text_mapped __force_data; extern void rmci_on(void); @@ -407,7 +404,7 @@ static unsigned int expand_bits_16[4] = { }; -static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) +static void draw_byte_32(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0xUL; @@ -428,7 +425,7 @@ static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) } } -static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb) +static inline void draw_byte_16(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0xUL; @@ -446,7 +443,7 @@ static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb) } } -static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +static inline void draw_byte_8(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0x0F0F0F0FUL; @@ -465,7 +462,8 @@ static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb) static noinline void draw_byte(unsigned char c, long locX, long locY) { unsigned char *base = calc_base(locX << 3, locY << 4); - unsigned char *font = &vga_font[((unsigned int)c) * 16]; + unsigned int font_index = c * 16; + const unsigned char *font = font_sun_8x16.data + font_index; int rb = dispDeviceRowBytes; rmci_maybe_on(); @@ -583,349 +581,3 @@ void __init udbg_init_btext(void) */ udbg_putc = btext_drawchar; } - -static unsigned char vga_font[cmapsz] = { -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, -0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff, -0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, -0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, -0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, -0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -0x00, 0x00
[PATCH v3 0/6] KVM: PPC: Nested APIv2 guest support
A nested-HV API for PAPR has been developed based on the KVM-specific nested-HV API that is upstream in Linux/KVM and QEMU. The PAPR API had to break compatibility to accommodate implementation in other hypervisors and partitioning firmware. The existing KVM-specific API will be known as the Nested APIv1 and the PAPR API will be known as the Nested APIv2. The control flow and interrupt processing between L0, L1, and L2 in the Nested APIv2 are conceptually unchanged. Where Nested APIv1 is almost stateless, the Nested APIv2 is stateful, with the L1 registering L2 virtual machines and vCPUs with the L0. Supervisor-privileged register switching duty is now the responsibility for the L0, which holds canonical L2 register state and handles all switching. This new register handling motivates the "getters and setters" wrappers to assist in syncing the L2s state in the L1 and the L0. Broadly, the new hcalls will be used for creating and managing guests by a regular partition in the following way: - L1 and L0 negotiate capabilities with H_GUEST_{G,S}ET_CAPABILITIES - L1 requests the L0 create a L2 with H_GUEST_CREATE and receives a handle to use in future hcalls - L1 requests the L0 create a L2 vCPU with H_GUEST_CREATE_VCPU - L1 sets up the L2 using H_GUEST_SET and the H_GUEST_VCPU_RUN input buffer - L1 requests the L0 runs the L2 vCPU using H_GUEST_VCPU_RUN - L2 returns to L1 with an exit reason and L1 reads the H_GUEST_VCPU_RUN output buffer populated by the L0 - L1 handles the exit using H_GET_STATE if necessary - L1 reruns L2 vCPU with H_GUEST_VCPU_RUN - L1 frees the L2 in the L0 with H_GUEST_DELETE Further details are available in Documentation/powerpc/kvm-nested.rst. This series adds KVM support for using this hcall interface as a regular PAPR partition, i.e. the L1. It does not add support for running as the L0. The new hcalls have been implemented in the spapr qemu model for testing. This is available at https://github.com/planetharsh/qemu/tree/upstream-0714-kop There are scripts available to assist in setting up an environment for testing nested guests at https://github.com/iamjpn/kvm-powervm-test A tree with this series is available at https://github.com/iamjpn/linux/tree/features/kvm-nestedv2-v3 Thanks to Amit Machhiwal, Kautuk Consul, Vaibhav Jain, Michael Neuling, Shivaprasad Bhat, Harsh Prateek Bora, Paul Mackerras and Nicholas Piggin. Change overview in v3: - KVM: PPC: Use getters and setters for vcpu register state - Do not add a helper for pvr - Use an expression when declaring variable in case - Squash in all getters and setters - Pass vector registers by reference - KVM: PPC: Rename accessor generator macros - New to series - KVM: PPC: Add helper library for Guest State Buffers - Use EXPORT_SYMBOL_GPL() - Use the kvmppc namespace - Move kvmppc_gsb_reset() out of kvmppc_gsm_fill_info() - Comments for GSID elements - Pass vector elements by reference - Remove generic put and get functions - KVM: PPC: Book3s HV: Hold LPIDs in an unsigned long - New to series - KVM: PPC: Add support for nestedv2 guests - Use EXPORT_SYMBOL_GPL() - Change to kvmhv_nestedv2 namespace - Make kvmhv_enable_nested() return -ENODEV on NESTEDv2 L1 hosts - s/kvmhv_on_papr/kvmhv_is_nestedv2/ - mv book3s_hv_papr.c book3s_hv_nestedv2.c - Handle shared regs without a guest state id in the same wrapper - Use a static key for API version - Add a positive test for NESTEDv1 - Give the amor a static value - s/struct kvmhv_nestedv2_host/struct kvmhv_nestedv2_io/ - Propagate failure in kvmhv_vcpu_entry_nestedv2() - WARN if getters and setters fail - Progagate failure from kvmhv_nestedv2_parse_output() - Replace delay with sleep in plpar_guest_{create,delete,create_vcpu}() - Add logical PVR handling - Replace kvmppc_gse_{get,put} with specific version - docs: powerpc: Document nested KVM on POWER - Fix typos Change overview in v2: - Rebase on top of kvm ppc prefix instruction support - Make documentation an individual patch - Move guest state buffer files from arch/powerpc/lib/ to arch/powerpc/kvm/ - Use kunit for testing guest state buffer - Fix some build errors - Change HEIR element from 4 bytes to 8 bytes Previous revisions: - v1: https://lore.kernel.org/linuxppc-dev/20230508072332.2937883-1-...@linux.vnet.ibm.com/ - v2: https://lore.kernel.org/linuxppc-dev/20230605064848.12319-1-...@linux.vnet.ibm.com/ Jordan Niethe (5): KVM: PPC: Use getters and setters for vcpu register state KVM: PPC: Rename accessor generator macros KVM: PPC: Add helper library for Guest State Buffers KVM: PPC: Book3s HV: Hold LPIDs in an unsigned long KVM: PPC: Add support for nestedv2 guests Michael Neuling (1): docs: powerpc: Document nested KVM on POWER Documentation/powerpc/
[PATCH v3 1/6] KVM: PPC: Use getters and setters for vcpu register state
There are already some getter and setter functions used for accessing vcpu register state, e.g. kvmppc_get_pc(). There are also more complicated examples that are generated by macros like kvmppc_get_sprg0() which are generated by the SHARED_SPRNG_WRAPPER() macro. In the new PAPR "Nestedv2" API for nested guest partitions the L1 is required to communicate with the L0 to modify and read nested guest state. Prepare to support this by replacing direct accesses to vcpu register state with wrapper functions. Follow the existing pattern of using macros to generate individual wrappers. These wrappers will be augmented for supporting Nestedv2 guests later. Signed-off-by: Gautam Menghani Signed-off-by: Jordan Niethe --- v3: - Do not add a helper for pvr - Use an expression when declaring variable in case - Squash in all getters and setters - Guatam: Pass vector registers by reference --- arch/powerpc/include/asm/kvm_book3s.h | 123 +- arch/powerpc/include/asm/kvm_booke.h | 10 ++ arch/powerpc/kvm/book3s.c | 38 ++--- arch/powerpc/kvm/book3s_64_mmu_hv.c| 4 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 9 +- arch/powerpc/kvm/book3s_64_vio.c | 4 +- arch/powerpc/kvm/book3s_hv.c | 220 + arch/powerpc/kvm/book3s_hv.h | 58 +++ arch/powerpc/kvm/book3s_hv_builtin.c | 10 +- arch/powerpc/kvm/book3s_hv_p9_entry.c | 4 +- arch/powerpc/kvm/book3s_hv_ras.c | 5 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c| 8 +- arch/powerpc/kvm/book3s_hv_rm_xics.c | 4 +- arch/powerpc/kvm/book3s_xive.c | 9 +- arch/powerpc/kvm/emulate_loadstore.c | 2 +- arch/powerpc/kvm/powerpc.c | 76 - 16 files changed, 395 insertions(+), 189 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index bbf5e2c5fe09..1a7e837ea2d5 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -392,6 +392,16 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) return vcpu->arch.regs.nip; } +static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 val) +{ + vcpu->arch.pid = val; +} + +static inline u32 kvmppc_get_pid(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.pid; +} + static inline u64 kvmppc_get_msr(struct kvm_vcpu *vcpu); static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) { @@ -403,10 +413,121 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) return vcpu->arch.fault_dar; } +static inline u64 kvmppc_get_fpr(struct kvm_vcpu *vcpu, int i) +{ + return vcpu->arch.fp.fpr[i][TS_FPROFFSET]; +} + +static inline void kvmppc_set_fpr(struct kvm_vcpu *vcpu, int i, u64 val) +{ + vcpu->arch.fp.fpr[i][TS_FPROFFSET] = val; +} + +static inline u64 kvmppc_get_fpscr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fp.fpscr; +} + +static inline void kvmppc_set_fpscr(struct kvm_vcpu *vcpu, u64 val) +{ + vcpu->arch.fp.fpscr = val; +} + + +static inline u64 kvmppc_get_vsx_fpr(struct kvm_vcpu *vcpu, int i, int j) +{ + return vcpu->arch.fp.fpr[i][j]; +} + +static inline void kvmppc_set_vsx_fpr(struct kvm_vcpu *vcpu, int i, int j, + u64 val) +{ + vcpu->arch.fp.fpr[i][j] = val; +} + +#ifdef CONFIG_VSX +static inline void kvmppc_get_vsx_vr(struct kvm_vcpu *vcpu, int i, vector128 *v) +{ + *v = vcpu->arch.vr.vr[i]; +} + +static inline void kvmppc_set_vsx_vr(struct kvm_vcpu *vcpu, int i, +vector128 *val) +{ + vcpu->arch.vr.vr[i] = *val; +} + +static inline u32 kvmppc_get_vscr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vr.vscr.u[3]; +} + +static inline void kvmppc_set_vscr(struct kvm_vcpu *vcpu, u32 val) +{ + vcpu->arch.vr.vscr.u[3] = val; +} +#endif + +#define KVMPPC_BOOK3S_VCPU_ACCESSOR_SET(reg, size) \ +static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ +{ \ + \ + vcpu->arch.reg = val; \ +} + +#define KVMPPC_BOOK3S_VCPU_ACCESSOR_GET(reg, size) \ +static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +{ \ + return vcpu->arch.reg; \ +} + +#define KVMPPC_BOOK3S_VCPU_ACCESSOR(reg, size) \ + KVMPPC_BOOK3S_VCPU_ACCESSOR_SET(reg, size) \ + KVMPPC_BOOK3S_VCPU_ACCESSOR_GET(reg, size) \ + +KVMPPC_BOOK3S_VCPU_ACCESSOR(tar, 64) +KVMPPC_BOOK3S_VCPU_ACCESSOR(ebbhr, 64) +KVMPPC_BOOK3S_VCPU_ACCESSOR(ebbrr, 64) +KVMPPC_BOOK3S_VCPU_ACCESSOR(bescr, 64) +KVMPPC_BOOK3S_VCPU_ACCESSOR(ic, 64) +K
[PATCH v3 2/6] KVM: PPC: Rename accessor generator macros
More "wrapper" style accessor generating macros will be introduced for the nestedv2 guest support. Rename the existing macros with more descriptive names now so there is a consistent naming convention. Signed-off-by: Jordan Niethe --- v3: - New to series --- arch/powerpc/include/asm/kvm_ppc.h | 60 +++--- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index d16d80ad2ae4..b66084a81dd0 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -927,19 +927,19 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu) #endif } -#define SPRNG_WRAPPER_GET(reg, bookehv_spr)\ +#define KVMPPC_BOOKE_HV_SPRNG_ACESSOR_GET(reg, bookehv_spr)\ static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu)\ { \ return mfspr(bookehv_spr); \ } \ -#define SPRNG_WRAPPER_SET(reg, bookehv_spr)\ +#define KVMPPC_BOOKE_HV_SPRNG_ACESSOR_SET(reg, bookehv_spr)\ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val) \ { \ mtspr(bookehv_spr, val); \ } \ -#define SHARED_WRAPPER_GET(reg, size) \ +#define KVMPPC_VCPU_SHARED_REGS_ACESSOR_GET(reg, size) \ static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ { \ if (kvmppc_shared_big_endian(vcpu)) \ @@ -948,7 +948,7 @@ static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ return le##size##_to_cpu(vcpu->arch.shared->reg);\ } \ -#define SHARED_WRAPPER_SET(reg, size) \ +#define KVMPPC_VCPU_SHARED_REGS_ACESSOR_SET(reg, size) \ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ { \ if (kvmppc_shared_big_endian(vcpu)) \ @@ -957,36 +957,36 @@ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ vcpu->arch.shared->reg = cpu_to_le##size(val); \ } \ -#define SHARED_WRAPPER(reg, size) \ - SHARED_WRAPPER_GET(reg, size) \ - SHARED_WRAPPER_SET(reg, size) \ +#define KVMPPC_VCPU_SHARED_REGS_ACESSOR(reg, size) \ + KVMPPC_VCPU_SHARED_REGS_ACESSOR_GET(reg, size) \ + KVMPPC_VCPU_SHARED_REGS_ACESSOR_SET(reg, size) \ -#define SPRNG_WRAPPER(reg, bookehv_spr) \ - SPRNG_WRAPPER_GET(reg, bookehv_spr) \ - SPRNG_WRAPPER_SET(reg, bookehv_spr) \ +#define KVMPPC_BOOKE_HV_SPRNG_ACESSOR(reg, bookehv_spr) \ + KVMPPC_BOOKE_HV_SPRNG_ACESSOR_GET(reg, bookehv_spr) \ + KVMPPC_BOOKE_HV_SPRNG_ACESSOR_SET(reg, bookehv_spr) \ #ifdef CONFIG_KVM_BOOKE_HV -#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ - SPRNG_WRAPPER(reg, bookehv_spr) \ +#define KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(reg, size, bookehv_spr) \ + KVMPPC_BOOKE_HV_SPRNG_ACESSOR(reg, bookehv_spr) \ #else -#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ - SHARED_WRAPPER(reg, size) \ +#define KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(reg, size, bookehv_spr) \ + KVMPPC_VCPU_SHARED_REGS_ACESSOR(reg, size) \ #endif -SHARED_WRAPPER(critical, 64) -SHARED_SPRNG_WRAPPER(sprg0, 64, SPRN_GSPRG0) -SHARED_SPRNG_WRAPPER(sprg1, 64, SPRN_GSPRG1) -SHARED_SPRNG_WRAPPER(sprg2, 64, SPRN_GSPRG2) -SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3) -SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0) -SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1) -SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR) -SHARED_SPRNG_WRAPPER(esr, 64, SPRN_GESR) -SHARED_WRAPPER_GET(msr, 64) +KVMPPC_VCPU_SHARED_REGS_ACESSOR(critical, 64)
[PATCH v3 3/6] KVM: PPC: Add helper library for Guest State Buffers
The PAPR "Nestedv2" guest API introduces the concept of a Guest State Buffer for communication about L2 guests between L1 and L0 hosts. In the new API, the L0 manages the L2 on behalf of the L1. This means that if the L1 needs to change L2 state (e.g. GPRs, SPRs, partition table...), it must request the L0 perform the modification. If the nested host needs to read L2 state likewise this request must go through the L0. The Guest State Buffer is a Type-Length-Value style data format defined in the PAPR which assigns all relevant partition state a unique identity. Unlike a typical TLV format the length is redundant as the length of each identity is fixed but is included for checking correctness. A guest state buffer consists of an element count followed by a stream of elements, where elements are composed of an ID number, data length, then the data: Header: <---4 bytes---> ++- | Element Count | Elements... ++- Element: <2 bytes---> <-2 bytes-> <-Length bytes-> ++---++ | Guest State ID | Length | Data | ++---++ Guest State IDs have other attributes defined in the PAPR such as whether they are per thread or per guest, or read-only. Introduce a library for using guest state buffers. This includes support for actions such as creating buffers, adding elements to buffers, reading the value of elements and parsing buffers. This will be used later by the nestedv2 guest support. Signed-off-by: Jordan Niethe --- v2: - Add missing #ifdef CONFIG_VSXs - Move files from lib/ to kvm/ - Guard compilation on CONFIG_KVM_BOOK3S_HV_POSSIBLE - Use kunit for guest state buffer tests - Add configuration option for the tests - Use macros for contiguous id ranges like GPRs - Add some missing EXPORTs to functions - HEIR element is a double word not a word v3: - Use EXPORT_SYMBOL_GPL() - Use the kvmppc namespace - Move kvmppc_gsb_reset() out of kvmppc_gsm_fill_info() - Comments for GSID elements - Pass vector elements by reference - Remove generic put and get functions --- arch/powerpc/Kconfig.debug| 12 + arch/powerpc/include/asm/guest-state-buffer.h | 904 ++ arch/powerpc/kvm/Makefile | 3 + arch/powerpc/kvm/guest-state-buffer.c | 571 +++ arch/powerpc/kvm/test-guest-state-buffer.c| 328 +++ 5 files changed, 1818 insertions(+) create mode 100644 arch/powerpc/include/asm/guest-state-buffer.h create mode 100644 arch/powerpc/kvm/guest-state-buffer.c create mode 100644 arch/powerpc/kvm/test-guest-state-buffer.c diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 2a54fadbeaf5..339c3a5f56f1 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -82,6 +82,18 @@ config MSI_BITMAP_SELFTEST bool "Run self-tests of the MSI bitmap code" depends on DEBUG_KERNEL +config GUEST_STATE_BUFFER_TEST + def_tristate n + prompt "Enable Guest State Buffer unit tests" + depends on KUNIT + depends on KVM_BOOK3S_HV_POSSIBLE + default KUNIT_ALL_TESTS + help + The Guest State Buffer is a data format specified in the PAPR. + It is by hcalls to communicate the state of L2 guests between + the L1 and L0 hypervisors. Enable unit tests for the library + used to create and use guest state buffers. + config PPC_IRQ_SOFT_MASK_DEBUG bool "Include extra checks for powerpc irq soft masking" depends on PPC64 diff --git a/arch/powerpc/include/asm/guest-state-buffer.h b/arch/powerpc/include/asm/guest-state-buffer.h new file mode 100644 index ..aaefe1075fc4 --- /dev/null +++ b/arch/powerpc/include/asm/guest-state-buffer.h @@ -0,0 +1,904 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Interface based on include/net/netlink.h + */ +#ifndef _ASM_POWERPC_GUEST_STATE_BUFFER_H +#define _ASM_POWERPC_GUEST_STATE_BUFFER_H + +#include +#include +#include + +/** + * Guest State Buffer Constants + **/ +/* Element without a value and any length */ +#define KVMPPC_GSID_BLANK 0x +/* Size required for the L0's internal VCPU representation */ +#define KVMPPC_GSID_HOST_STATE_SIZE0x0001 + /* Minimum size for the H_GUEST_RUN_VCPU output buffer */ +#define KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE0x0002 + /* "Logical" PVR value as defined in the PAPR */ +#define KVMPPC_GSID_LOGICAL_PVR0x0003 + /* L0 relative timebase offset */ +#define KVMPPC_GSID_TB_OFFSET 0x0004 + /* Partition Scoped Page Table Info */ +#define KVMPPC_GSID_PARTITION_TABLE0x0005 + /* Process Table Info */ +#define KVMPPC_GSID_PROCESS_TABLE
[PATCH v3 4/6] KVM: PPC: Book3s HV: Hold LPIDs in an unsigned long
The LPID register is 32 bits long. The host keeps the lpids for each guest in an unsigned word struct kvm_arch. Currently, LPIDs are already limited by mmu_lpid_bits and KVM_MAX_NESTED_GUESTS_SHIFT. The nestedv2 API returns a 64 bit "Guest ID" to be used be the L1 host for each L2 guest. This value is used as an lpid, e.g. it is the parameter used by H_RPT_INVALIDATE. To minimize needless special casing it makes sense to keep this "Guest ID" in struct kvm_arch::lpid. This means that struct kvm_arch::lpid is too small so prepare for this and make it an unsigned long. This is not a problem for the KVM-HV and nestedv1 cases as their lpid values are already limited to valid ranges so in those contexts the lpid can be used as an unsigned word safely as needed. In the PAPR, the H_RPT_INVALIDATE pid/lpid parameter is already specified as an unsigned long so change pseries_rpt_invalidate() to match that. Update the callers of pseries_rpt_invalidate() to also take an unsigned long if they take an lpid value. Signed-off-by: Jordan Niethe --- v3: - New to series --- arch/powerpc/include/asm/kvm_book3s.h | 10 +- arch/powerpc/include/asm/kvm_book3s_64.h | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/plpar_wrappers.h | 4 ++-- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c| 22 +++--- arch/powerpc/kvm/book3s_hv_nested.c | 4 ++-- arch/powerpc/kvm/book3s_xive.c| 4 ++-- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 1a7e837ea2d5..98d4870ec4b3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -191,14 +191,14 @@ extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, bool data, bool iswrite); extern void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, - unsigned int pshift, unsigned int lpid); + unsigned int pshift, unsigned long lpid); extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, unsigned int shift, const struct kvm_memory_slot *memslot, - unsigned int lpid); + unsigned long lpid); extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, unsigned long gpa, - unsigned int lpid); + unsigned long lpid); extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, unsigned long gpa, struct kvm_memory_slot *memslot, @@ -207,7 +207,7 @@ extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, extern int kvmppc_init_vm_radix(struct kvm *kvm); extern void kvmppc_free_radix(struct kvm *kvm); extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, - unsigned int lpid); + unsigned long lpid); extern int kvmppc_radix_init(void); extern void kvmppc_radix_exit(void); extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -300,7 +300,7 @@ void kvmhv_nested_exit(void); void kvmhv_vm_nested_init(struct kvm *kvm); long kvmhv_set_partition_table(struct kvm_vcpu *vcpu); long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu); -void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1); +void kvmhv_set_ptbl_entry(unsigned long lpid, u64 dw0, u64 dw1); void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index d49065af08e9..9fc3ad3990f7 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -624,7 +624,7 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, unsigned long gpa, unsigned int level, -unsigned long mmu_seq, unsigned int lpid, +unsigned long mmu_seq, unsigned long lpid, unsigned long *rmapp, struct rmap_nested **n_rmap); extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp, struct rmap_nested **n_rmap); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 14ee0dece853..67dd3e749cac 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/
[PATCH v3 5/6] KVM: PPC: Add support for nestedv2 guests
A series of hcalls have been added to the PAPR which allow a regular guest partition to create and manage guest partitions of its own. KVM already had an interface that allowed this on powernv platforms. This existing interface will now be called "nestedv1". The newly added PAPR interface will be called "nestedv2". PHYP will support the nestedv2 interface. At this time the host side of the nestedv2 interface has not been implemented on powernv but there is no technical reason why it could not be added. The nestedv1 interface is still supported. Add support to KVM to utilize these hcalls to enable running nested guests as a pseries guest on PHYP. Overview of the new hcall usage: - L1 and L0 negotiate capabilities with H_GUEST_{G,S}ET_CAPABILITIES() - L1 requests the L0 create a L2 with H_GUEST_CREATE() and receives a handle to use in future hcalls - L1 requests the L0 create a L2 vCPU with H_GUEST_CREATE_VCPU() - L1 sets up the L2 using H_GUEST_SET and the H_GUEST_VCPU_RUN input buffer - L1 requests the L0 runs the L2 vCPU using H_GUEST_VCPU_RUN() - L2 returns to L1 with an exit reason and L1 reads the H_GUEST_VCPU_RUN output buffer populated by the L0 - L1 handles the exit using H_GET_STATE if necessary - L1 reruns L2 vCPU with H_GUEST_VCPU_RUN - L1 frees the L2 in the L0 with H_GUEST_DELETE() Support for the new API is determined by trying H_GUEST_GET_CAPABILITIES. On a successful return, use the nestedv2 interface. Use the vcpu register state setters for tracking modified guest state elements and copy the thread wide values into the H_GUEST_VCPU_RUN input buffer immediately before running a L2. The guest wide elements can not be added to the input buffer so send them with a separate H_GUEST_SET call if necessary. Make the vcpu register getter load the corresponding value from the real host with H_GUEST_GET. To avoid unnecessarily calling H_GUEST_GET, track which values have already been loaded between H_GUEST_VCPU_RUN calls. If an element is present in the H_GUEST_VCPU_RUN output buffer it also does not need to be loaded again. Signed-off-by: Vaibhav Jain Signed-off-by: Gautam Menghani Signed-off-by: Kautuk Consul Signed-off-by: Amit Machhiwal Signed-off-by: Jordan Niethe --- v2: - Declare op structs as static - Guatam: Use expressions in switch case with local variables - Do not use the PVR for the LOGICAL PVR ID - Kautuk: Handle emul_inst as now a double word, init correctly - Use new GPR(), etc macros - Amit: Determine PAPR nested capabilities from cpu features v3: - Use EXPORT_SYMBOL_GPL() - Change to kvmhv_nestedv2 namespace - Make kvmhv_enable_nested() return -ENODEV on NESTEDv2 L1 hosts - s/kvmhv_on_papr/kvmhv_is_nestedv2/ - mv book3s_hv_papr.c book3s_hv_nestedv2.c - Handle shared regs without a guest state id in the same wrapper - Vaibhav: Use a static key for API version - Add a positive test for NESTEDv1 - Give the amor a static value - s/struct kvmhv_nestedv2_host/struct kvmhv_nestedv2_io/ - Propagate failure in kvmhv_vcpu_entry_nestedv2() - WARN if getters and setters fail - Progagate failure from kvmhv_nestedv2_parse_output() - Replace delay with sleep in plpar_guest_{create,delete,create_vcpu}() - Amit: Add logical PVR handling - Replace kvmppc_gse_{get,put} with specific version --- arch/powerpc/include/asm/guest-state-buffer.h | 91 ++ arch/powerpc/include/asm/hvcall.h | 30 + arch/powerpc/include/asm/kvm_book3s.h | 136 ++- arch/powerpc/include/asm/kvm_book3s_64.h | 6 + arch/powerpc/include/asm/kvm_host.h | 20 + arch/powerpc/include/asm/kvm_ppc.h| 96 +- arch/powerpc/include/asm/plpar_wrappers.h | 188 arch/powerpc/kvm/Makefile | 1 + arch/powerpc/kvm/book3s_hv.c | 136 ++- arch/powerpc/kvm/book3s_hv.h | 72 +- arch/powerpc/kvm/book3s_hv_nested.c | 38 +- arch/powerpc/kvm/book3s_hv_nestedv2.c | 985 ++ arch/powerpc/kvm/emulate_loadstore.c | 4 +- arch/powerpc/kvm/guest-state-buffer.c | 50 + 14 files changed, 1757 insertions(+), 96 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_hv_nestedv2.c diff --git a/arch/powerpc/include/asm/guest-state-buffer.h b/arch/powerpc/include/asm/guest-state-buffer.h index aaefe1075fc4..808149f31576 100644 --- a/arch/powerpc/include/asm/guest-state-buffer.h +++ b/arch/powerpc/include/asm/guest-state-buffer.h @@ -5,6 +5,7 @@ #ifndef _ASM_POWERPC_GUEST_STATE_BUFFER_H #define _ASM_POWERPC_GUEST_STATE_BUFFER_H +#include "asm/hvcall.h" #include #include #include @@ -313,6 +314,8 @@ struct kvmppc_gs_buff *kvmppc_gsb_new(size_t size, unsigned long guest_id, unsigned long vcpu_id, gfp_t flags); void kvmppc_gsb_free(struct kvmppc_gs_buff *gsb); void *kvmppc_gsb_put(struct kvmppc_gs_buff *gsb, size_t size); +int kvmppc_gsb_send(struct kvmppc_gs_buff *gsb, un
[PATCH v3 6/6] docs: powerpc: Document nested KVM on POWER
From: Michael Neuling Document support for nested KVM on POWER using the existing API as well as the new PAPR API. This includes the new HCALL interface and how it used by KVM. Signed-off-by: Michael Neuling Signed-off-by: Jordan Niethe --- v2: - Separated into individual patch v3: - Fix typos --- Documentation/powerpc/index.rst | 1 + Documentation/powerpc/kvm-nested.rst | 636 +++ 2 files changed, 637 insertions(+) create mode 100644 Documentation/powerpc/kvm-nested.rst diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst index d33b554ca7ba..23e449994c2a 100644 --- a/Documentation/powerpc/index.rst +++ b/Documentation/powerpc/index.rst @@ -26,6 +26,7 @@ powerpc isa-versions kaslr-booke32 mpc52xx +kvm-nested papr_hcalls pci_iov_resource_on_powernv pmu-ebb diff --git a/Documentation/powerpc/kvm-nested.rst b/Documentation/powerpc/kvm-nested.rst new file mode 100644 index ..8b37981dc3d9 --- /dev/null +++ b/Documentation/powerpc/kvm-nested.rst @@ -0,0 +1,636 @@ +.. SPDX-License-Identifier: GPL-2.0 + + +Nested KVM on POWER + + +Introduction + + +This document explains how a guest operating system can act as a +hypervisor and run nested guests through the use of hypercalls, if the +hypervisor has implemented them. The terms L0, L1, and L2 are used to +refer to different software entities. L0 is the hypervisor mode entity +that would normally be called the "host" or "hypervisor". L1 is a +guest virtual machine that is directly run under L0 and is initiated +and controlled by L0. L2 is a guest virtual machine that is initiated +and controlled by L1 acting as a hypervisor. + +Existing API + + +Linux/KVM has had support for Nesting as an L0 or L1 since 2018 + +The L0 code was added:: + + commit 8e3f5fc1045dc49fd175b978c5457f5f51e7a2ce + Author: Paul Mackerras + Date: Mon Oct 8 16:31:03 2018 +1100 + KVM: PPC: Book3S HV: Framework and hcall stubs for nested virtualization + +The L1 code was added:: + + commit 360cae313702cdd0b90f82c261a8302fecef030a + Author: Paul Mackerras + Date: Mon Oct 8 16:31:04 2018 +1100 + KVM: PPC: Book3S HV: Nested guest entry via hypercall + +This API works primarily using a single hcall h_enter_nested(). This +call made by the L1 to tell the L0 to start an L2 vCPU with the given +state. The L0 then starts this L2 and runs until an L2 exit condition +is reached. Once the L2 exits, the state of the L2 is given back to +the L1 by the L0. The full L2 vCPU state is always transferred from +and to L1 when the L2 is run. The L0 doesn't keep any state on the L2 +vCPU (except in the short sequence in the L0 on L1 -> L2 entry and L2 +-> L1 exit). + +The only state kept by the L0 is the partition table. The L1 registers +it's partition table using the h_set_partition_table() hcall. All +other state held by the L0 about the L2s is cached state (such as +shadow page tables). + +The L1 may run any L2 or vCPU without first informing the L0. It +simply starts the vCPU using h_enter_nested(). The creation of L2s and +vCPUs is done implicitly whenever h_enter_nested() is called. + +In this document, we call this existing API the v1 API. + +New PAPR API +=== + +The new PAPR API changes from the v1 API such that the creating L2 and +associated vCPUs is explicit. In this document, we call this the v2 +API. + +h_enter_nested() is replaced with H_GUEST_VCPU_RUN(). Before this can +be called the L1 must explicitly create the L2 using h_guest_create() +and any associated vCPUs() created with h_guest_create_vCPU(). Getting +and setting vCPU state can also be performed using h_guest_{g|s}et +hcall. + +The basic execution flow is for an L1 to create an L2, run it, and +delete it is: + +- L1 and L0 negotiate capabilities with H_GUEST_{G,S}ET_CAPABILITIES() + (normally at L1 boot time). + +- L1 requests the L0 create an L2 with H_GUEST_CREATE() and receives a token + +- L1 requests the L0 create an L2 vCPU with H_GUEST_CREATE_VCPU() + +- L1 and L0 communicate the vCPU state using the H_GUEST_{G,S}ET() hcall + +- L1 requests the L0 runs the vCPU running H_GUEST_VCPU_RUN() hcall + +- L1 deletes L2 with H_GUEST_DELETE() + +More details of the individual hcalls follows: + +HCALL Details += + +This documentation is provided to give an overall understating of the +API. It doesn't aim to provide all the details required to implement +an L1 or L0. Latest version of PAPR can be referred to for more details. + +All these HCALLs are made by the L1 to the L0. + +H_GUEST_GET_CAPABILITIES() +-- + +This is called to get the capabilities of the L0 nested +hypervisor. This includes capabilities such the CPU versions (eg +POWER9, POWER10) that are supported as L2s:: + + H_GUEST_GET_CAPABILITIES(uint64 flags) + + Parameters: +Input: + flags: Reserv
Re: [PATCH v3 10/13] mm/khugepaged: collapse_pte_mapped_thp() with mmap_read_lock()
On 2023/8/6 11:55, Hugh Dickins wrote: On Thu, 3 Aug 2023, Qi Zheng wrote: On 2023/7/12 12:42, Hugh Dickins wrote: Bring collapse_and_free_pmd() back into collapse_pte_mapped_thp(). It does need mmap_read_lock(), but it does not need mmap_write_lock(), nor vma_start_write() nor i_mmap lock nor anon_vma lock. All racing paths are relying on pte_offset_map_lock() and pmd_lock(), so use those. ... @@ -1681,47 +1634,76 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (pte_none(ptent)) continue; - page = vm_normal_page(vma, addr, ptent); - if (WARN_ON_ONCE(page && is_zone_device_page(page))) + /* +* We dropped ptl after the first scan, to do the mmu_notifier: +* page lock stops more PTEs of the hpage being faulted in, but +* does not stop write faults COWing anon copies from existing +* PTEs; and does not stop those being swapped out or migrated. +*/ + if (!pte_present(ptent)) { + result = SCAN_PTE_NON_PRESENT; goto abort; + } + page = vm_normal_page(vma, addr, ptent); + if (hpage + i != page) + goto abort; + + /* +* Must clear entry, or a racing truncate may re-remove it. +* TLB flush can be left until pmdp_collapse_flush() does it. +* PTE dirty? Shmem page is already dirty; file is read-only. +*/ + pte_clear(mm, addr, pte); This is not non-present PTE entry, so we should call ptep_clear() to let page_table_check track the PTE clearing operation, right? Otherwise it may lead to false positives? You are right: thanks a lot for catching that: fix patch follows. With fix patch: Reviewed-by: Qi Zheng Thanks. Hugh
[PATCH 2/2] ocxl: use pci_find_next_dvsec_capability() to simplify the code
PCI core add pci_find_next_dvsec_capability() to query the next DVSEC. We can use that core API to simplify the code. Also remove the unused macros. Signed-off-by: Xiongfeng Wang --- arch/powerpc/platforms/powernv/ocxl.c | 20 ++-- drivers/misc/ocxl/config.c| 21 ++--- include/misc/ocxl-config.h| 4 3 files changed, 8 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 629067781cec..8dbc1a9535fc 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -71,29 +71,13 @@ static DEFINE_MUTEX(links_list_lock); * the AFUs, by pro-rating if needed. */ -static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) -{ - int vsec = pos; - u16 vendor, id; - - while ((vsec = pci_find_next_ext_capability(dev, vsec, - OCXL_EXT_CAP_ID_DVSEC))) { - pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, - &vendor); - pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); - if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) - return vsec; - } - return 0; -} - static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) { int vsec = 0; u8 idx; - while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, - vsec))) { + while ((vsec = pci_find_next_dvsec_capability(dev, vsec, + PCI_VENDOR_ID_IBM, OCXL_DVSEC_AFU_CTRL_ID))) { pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, &idx); if (idx == afu_idx) diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c index 92ab49705f64..6c0fca32e6db 100644 --- a/drivers/misc/ocxl/config.c +++ b/drivers/misc/ocxl/config.c @@ -39,23 +39,14 @@ static int find_dvsec(struct pci_dev *dev, int dvsec_id) static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) { int vsec = 0; - u16 vendor, id; u8 idx; - while ((vsec = pci_find_next_ext_capability(dev, vsec, - OCXL_EXT_CAP_ID_DVSEC))) { - pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, - &vendor); - pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); - - if (vendor == PCI_VENDOR_ID_IBM && - id == OCXL_DVSEC_AFU_CTRL_ID) { - pci_read_config_byte(dev, - vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, - &idx); - if (idx == afu_idx) - return vsec; - } + while ((vsec = pci_find_next_dvsec_capability(dev, vsec, + PCI_VENDOR_ID_IBM, OCXL_DVSEC_AFU_CTRL_ID))) { + pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, +&idx); + if (idx == afu_idx) + return vsec; } return 0; } diff --git a/include/misc/ocxl-config.h b/include/misc/ocxl-config.h index ccfd3b463517..40cf1b143170 100644 --- a/include/misc/ocxl-config.h +++ b/include/misc/ocxl-config.h @@ -10,10 +10,6 @@ * It follows the specification for opencapi 3.0 */ -#define OCXL_EXT_CAP_ID_DVSEC 0x23 - -#define OCXL_DVSEC_VENDOR_OFFSET 0x4 -#define OCXL_DVSEC_ID_OFFSET 0x8 #define OCXL_DVSEC_TL_ID 0xF000 #define OCXL_DVSEC_TL_BACKOFF_TIMERS 0x10 #define OCXL_DVSEC_TL_RECV_CAP0x18 -- 2.20.1
[PATCH 1/2] PCI: Add pci_find_next_dvsec_capability to find next designated VSEC
Some devices may have several DVSEC(Designated Vendor-Specific Extended Capability) entries with the same DVSEC ID. Add pci_find_next_dvsec_capability() to find them all. Signed-off-by: Xiongfeng Wang --- drivers/pci/pci.c | 37 + include/linux/pci.h | 2 ++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 60230da957e0..3455ca7306ae 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -749,35 +749,48 @@ u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap) EXPORT_SYMBOL_GPL(pci_find_vsec_capability); /** - * pci_find_dvsec_capability - Find DVSEC for vendor + * pci_find_next_dvsec_capability - Find next DVSEC for vendor * @dev: PCI device to query + * @start: address at which to start looking (0 to start at beginning of list) * @vendor: Vendor ID to match for the DVSEC * @dvsec: Designated Vendor-specific capability ID * - * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability - * offset in config space; otherwise return 0. + * Returns the address of the next DVSEC if the DVSEC has Vendor ID @vendor and + * DVSEC ID @dvsec; otherwise return 0. DVSEC can occur several times with the + * same DVSEC ID for some devices, and this provides a way to find them all. */ -u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec) +u16 pci_find_next_dvsec_capability(struct pci_dev *dev, u16 start, u16 vendor, + u16 dvsec) { - int pos; + u16 pos = start; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC); - if (!pos) - return 0; - - while (pos) { + while ((pos = pci_find_next_ext_capability(dev, pos, + PCI_EXT_CAP_ID_DVSEC))) { u16 v, id; pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, &v); pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, &id); if (vendor == v && dvsec == id) return pos; - - pos = pci_find_next_ext_capability(dev, pos, PCI_EXT_CAP_ID_DVSEC); } return 0; } +EXPORT_SYMBOL_GPL(pci_find_next_dvsec_capability); + +/** + * pci_find_dvsec_capability - Find DVSEC for vendor + * @dev: PCI device to query + * @vendor: Vendor ID to match for the DVSEC + * @dvsec: Designated Vendor-specific capability ID + * + * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability + * offset in config space; otherwise return 0. + */ +u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec) +{ + return pci_find_next_dvsec_capability(dev, 0, vendor, dvsec); +} EXPORT_SYMBOL_GPL(pci_find_dvsec_capability); /** diff --git a/include/linux/pci.h b/include/linux/pci.h index c69a2cc1f412..82bb905daf72 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1168,6 +1168,8 @@ u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap); u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec); +u16 pci_find_next_dvsec_capability(struct pci_dev *dev, u16 start, u16 vendor, + u16 dvsec); u64 pci_get_dsn(struct pci_dev *dev); -- 2.20.1
[PATCH 0/2] introduce pci_find_next_dvsec_capability() to simplify the code
Some devices may have several DVSEC(Designated Vendor-Specific Extended Capability) entries with the same DVSEC ID. Introduce pci_find_next_dvsec_capability() to simplify the code. Xiongfeng Wang (2): PCI: Add pci_find_next_dvsec_capability to find next designated VSEC ocxl: use pci_find_next_dvsec_capability() to simplify the code arch/powerpc/platforms/powernv/ocxl.c | 20 ++- drivers/misc/ocxl/config.c| 21 +-- drivers/pci/pci.c | 37 ++- include/linux/pci.h | 2 ++ include/misc/ocxl-config.h| 4 --- 5 files changed, 35 insertions(+), 49 deletions(-) -- 2.20.1
Re: [RFC PATCH] cxl: Use pci_find_vsec_capability() to simplify the code
On Fri, 2023-08-04 at 15:56 +0800, Xiongfeng Wang wrote: > PCI core add pci_find_vsec_capability() to query VSEC. We can use > that > core API to simplify the code. > > The only logical change is that pci_find_vsec_capability check the > Vendor ID before finding the VSEC. > > PCI spec rev 5.0 says in 7.9.5.2 Vendor-Specific Header: > VSEC ID - This field is a vendor-defined ID number that indicates > the > nature and format of the VSEC structure > Software must qualify the Vendor ID before interpreting this field. > > Signed-off-by: Xiongfeng Wang LGTM The cxl driver doesn't currently bind to any devices that don't have an IBM vendor ID, and it's very unlikely to in future. If that ever changes, this will of course need to be updated accordingly. Reviewed-by: Andrew Donnellan > --- > drivers/misc/cxl/pci.c | 12 ++-- > 1 file changed, 2 insertions(+), 10 deletions(-) > > diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c > index 0ff944860dda..f3108977755d 100644 > --- a/drivers/misc/cxl/pci.c > +++ b/drivers/misc/cxl/pci.c > @@ -150,16 +150,8 @@ static inline resource_size_t p2_size(struct > pci_dev *dev) > > static int find_cxl_vsec(struct pci_dev *dev) > { > - int vsec = 0; > - u16 val; > - > - while ((vsec = pci_find_next_ext_capability(dev, vsec, > PCI_EXT_CAP_ID_VNDR))) { > - pci_read_config_word(dev, vsec + 0x4, &val); > - if (val == CXL_PCI_VSEC_ID) > - return vsec; > - } > - return 0; > - > + return pci_find_vsec_capability(dev, PCI_VENDOR_ID_IBM, > + CXL_PCI_VSEC_ID); > } > > static void dump_cxl_config_space(struct pci_dev *dev) -- Andrew DonnellanOzLabs, ADL Canberra a...@linux.ibm.com IBM Australia Limited
Re: [PATCH 1/2] PCI: Add pci_find_next_dvsec_capability to find next designated VSEC
On Mon, 2023-08-07 at 11:18 +0800, Xiongfeng Wang wrote: > Some devices may have several DVSEC(Designated Vendor-Specific > Extended > Capability) entries with the same DVSEC ID. Add > pci_find_next_dvsec_capability() to find them all. > > Signed-off-by: Xiongfeng Wang > Reviewed-by: Andrew Donnellan > --- > drivers/pci/pci.c | 37 + > include/linux/pci.h | 2 ++ > 2 files changed, 27 insertions(+), 12 deletions(-) > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index 60230da957e0..3455ca7306ae 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -749,35 +749,48 @@ u16 pci_find_vsec_capability(struct pci_dev > *dev, u16 vendor, int cap) > EXPORT_SYMBOL_GPL(pci_find_vsec_capability); > > /** > - * pci_find_dvsec_capability - Find DVSEC for vendor > + * pci_find_next_dvsec_capability - Find next DVSEC for vendor > * @dev: PCI device to query > + * @start: address at which to start looking (0 to start at > beginning of list) > * @vendor: Vendor ID to match for the DVSEC > * @dvsec: Designated Vendor-specific capability ID > * > - * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the > capability > - * offset in config space; otherwise return 0. > + * Returns the address of the next DVSEC if the DVSEC has Vendor ID > @vendor and > + * DVSEC ID @dvsec; otherwise return 0. DVSEC can occur several > times with the > + * same DVSEC ID for some devices, and this provides a way to find > them all. > */ > -u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 > dvsec) > +u16 pci_find_next_dvsec_capability(struct pci_dev *dev, u16 start, > u16 vendor, > + u16 dvsec) > { > - int pos; > + u16 pos = start; > > - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC); > - if (!pos) > - return 0; > - > - while (pos) { > + while ((pos = pci_find_next_ext_capability(dev, pos, > + > PCI_EXT_CAP_ID_DVSEC))) { > u16 v, id; > > pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, > &v); > pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, > &id); > if (vendor == v && dvsec == id) > return pos; > - > - pos = pci_find_next_ext_capability(dev, pos, > PCI_EXT_CAP_ID_DVSEC); > } > > return 0; > } > +EXPORT_SYMBOL_GPL(pci_find_next_dvsec_capability); > + > +/** > + * pci_find_dvsec_capability - Find DVSEC for vendor > + * @dev: PCI device to query > + * @vendor: Vendor ID to match for the DVSEC > + * @dvsec: Designated Vendor-specific capability ID > + * > + * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the > capability > + * offset in config space; otherwise return 0. > + */ > +u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 > dvsec) > +{ > + return pci_find_next_dvsec_capability(dev, 0, vendor, dvsec); > +} > EXPORT_SYMBOL_GPL(pci_find_dvsec_capability); > > /** > diff --git a/include/linux/pci.h b/include/linux/pci.h > index c69a2cc1f412..82bb905daf72 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -1168,6 +1168,8 @@ u16 pci_find_next_ext_capability(struct pci_dev > *dev, u16 pos, int cap); > struct pci_bus *pci_find_next_bus(const struct pci_bus *from); > u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int > cap); > u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 > dvsec); > +u16 pci_find_next_dvsec_capability(struct pci_dev *dev, u16 start, > u16 vendor, > + u16 dvsec); > > u64 pci_get_dsn(struct pci_dev *dev); > -- Andrew DonnellanOzLabs, ADL Canberra a...@linux.ibm.com IBM Australia Limited
[PATCH] perf test: Fix parse-events tests to skip parametrized events
Testcase "Parsing of all PMU events from sysfs" parse events for all PMUs, and not just cpu. In case of powerpc, the PowerVM environment supports events from hv_24x7 and hv_gpci PMU which is of example format like below: - hv_24x7/CPM_ADJUNCT_INST,domain=?,core=?/ - hv_gpci/event,partition_id=?/ The value for "?" needs to be filled in depending on system configuration. It is better to skip these parametrized events in this test as it is done in: 'commit b50d691e50e6 ("perf test: Fix "all PMU test" to skip parametrized events")' which handled a simialr instance with "all PMU test". Fix parse-events test to skip parametrized events since it needs proper setup of the parameters. Signed-off-by: Athira Rajeev --- tools/perf/tests/parse-events.c | 32 1 file changed, 32 insertions(+) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index b2f82847e4c3..605373c7d005 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2504,7 +2504,11 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest while ((pmu = perf_pmus__scan(pmu)) != NULL) { struct stat st; char path[PATH_MAX]; + char pmu_event[PATH_MAX + 256]; + char *buf = NULL; + FILE *file; struct dirent *ent; + size_t len = 0; DIR *dir; int err; @@ -2528,11 +2532,39 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest struct evlist_test e = { .name = NULL, }; char name[2 * NAME_MAX + 1 + 12 + 3]; int test_ret; + int skip = 0; /* Names containing . are special and cannot be used directly */ if (strchr(ent->d_name, '.')) continue; + /* exclude parametrized ones (name contains '?') */ + snprintf(pmu_event, PATH_MAX + 256, "%s%s", path, ent->d_name); + file = fopen(pmu_event, "r"); + if (!file) { + pr_debug("can't open pmu event file for '%s'\n", ent->d_name); + ret = combine_test_results(ret, TEST_FAIL); + continue; + } + + if (getline(&buf, &len, file) < 0) { + pr_debug(" pmu event: %s is a null event\n", ent->d_name); + ret = combine_test_results(ret, TEST_FAIL); + continue; + } + + if (strchr(buf, '?')) + skip = 1; + + free(buf); + buf = NULL; + fclose(file); + + if (skip == 1) { + pr_debug("skipping parametrized PMU event: %s which contains ?\n", pmu_event); + continue; + } + snprintf(name, sizeof(name), "%s/event=%s/u", pmu->name, ent->d_name); e.name = name; -- 2.31.1
[PATCH] tools/perf: Fix bpf__probe to set bpf_prog_type type only if differs from the desired one
The test "BPF prologue generation" fails as below: Writing event: p:perf_bpf_probe/func _text+10423200 f_mode=+20(%gpr3):x32 offset=%gpr4:s64 orig=%gpr5:s32 In map_prologue, ntevs=1 mapping[0]=0 libbpf: prog 'bpf_func__null_lseek': BPF program load failed: Permission denied libbpf: prog 'bpf_func__null_lseek': -- BEGIN PROG LOAD LOG -- btf_vmlinux is malformed reg type unsupported for arg#0 function bpf_func__null_lseek#5 0: R1=ctx(off=0,imm=0) R10=fp0 ; 0: (57) r3 &= 2 R3 !read_ok processed 1 insns (limit 100) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 -- END PROG LOAD LOG -- libbpf: prog 'bpf_func__null_lseek': failed to load: -13 libbpf: failed to load object '[bpf_prologue_test]' bpf: load objects failed: err=-13: (Permission denied) Failed to add events selected by BPF This fails occurs after this commit: commit d6e6286a12e7 ("libbpf: disassociate section handler on explicit bpf_program__set_type() call")' With this change, SEC_DEF handler libbpf which is determined initially based on program's SEC() is set to NULL. The change is made because sec_def is not valid when user sets the program type with bpf_program__set_type function. This commit also fixed bpf_prog_test_load() helper in selftests/bpf to force-set program type only if it differs from the desired one. The "bpf__probe" function in util/bpf-loader.c, also calls bpf_program__set_type to set bpf_prog_type. Add similar fix in here as well to avoid setting sec_def to NULL. Reported-by: Sachin Sant Signed-off-by: Athira Rajeev --- tools/perf/util/bpf-loader.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 44cde27d6389..b8e0b430e302 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -870,12 +870,14 @@ int bpf__probe(struct bpf_object *obj) goto out; } - if (priv->is_tp) { + if (priv->is_tp && bpf_program__type(prog) != BPF_PROG_TYPE_TRACEPOINT) { bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT); continue; } - bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE); + if (bpf_program__type(prog) != BPF_PROG_TYPE_KPROBE) + bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE); + pev = &priv->pev; err = convert_perf_probe_events(pev, 1); -- 2.39.3
Re: [PATCH 2/2] ocxl: use pci_find_next_dvsec_capability() to simplify the code
On Mon, 2023-08-07 at 11:18 +0800, Xiongfeng Wang wrote: > PCI core add pci_find_next_dvsec_capability() to query the next > DVSEC. > We can use that core API to simplify the code. Also remove the unused > macros. > > Signed-off-by: Xiongfeng Wang Reviewed-by: Andrew Donnellan -- Andrew DonnellanOzLabs, ADL Canberra a...@linux.ibm.com IBM Australia Limited
Re: [PATCH 1/1] perf tests task_analyzer: Check perf build options for libtraceevent support
On Thu, Aug 03, 2023 at 12:52:50PM +0530, Athira Rajeev wrote: > > > > On 03-Aug-2023, at 8:33 AM, Aditya Gupta wrote: > > > > Hi Arnaldo, > > I am working on a patch for 'perf version --has', and will send a patch next > > Hi Aditya, > > I believe, it will be “perf build —has” option. And not “perf version —has” ? Oh okay. I misread. Thanks for pointing it out Athira. Thanks, Aditya G
Re: [PATCH] tools/perf: Fix bpf__probe to set bpf_prog_type type only if differs from the desired one
> On 07-Aug-2023, at 10:22 AM, Athira Rajeev > wrote: > > The test "BPF prologue generation" fails as below: > > Writing event: p:perf_bpf_probe/func _text+10423200 f_mode=+20(%gpr3):x32 > offset=%gpr4:s64 orig=%gpr5:s32 > In map_prologue, ntevs=1 > mapping[0]=0 > libbpf: prog 'bpf_func__null_lseek': BPF program load failed: Permission > denied > libbpf: prog 'bpf_func__null_lseek': -- BEGIN PROG LOAD LOG -- > btf_vmlinux is malformed > reg type unsupported for arg#0 function bpf_func__null_lseek#5 > 0: R1=ctx(off=0,imm=0) R10=fp0 > ; > 0: (57) r3 &= 2 > R3 !read_ok > processed 1 insns (limit 100) max_states_per_insn 0 total_states 0 > peak_states 0 mark_read 0 > -- END PROG LOAD LOG -- > libbpf: prog 'bpf_func__null_lseek': failed to load: -13 > libbpf: failed to load object '[bpf_prologue_test]' > bpf: load objects failed: err=-13: (Permission denied) > Failed to add events selected by BPF > > This fails occurs after this commit: > commit d6e6286a12e7 ("libbpf: disassociate section handler > on explicit bpf_program__set_type() call")' > > With this change, SEC_DEF handler libbpf which is determined > initially based on program's SEC() is set to NULL. The change > is made because sec_def is not valid when user sets the program > type with bpf_program__set_type function. This commit also fixed > bpf_prog_test_load() helper in selftests/bpf to force-set program > type only if it differs from the desired one. > > The "bpf__probe" function in util/bpf-loader.c, also calls > bpf_program__set_type to set bpf_prog_type. Add similar fix in > here as well to avoid setting sec_def to NULL. > > Reported-by: Sachin Sant > Signed-off-by: Athira Rajeev > --- Thanks Athira for the fix. With this patch applied perf BPF prologue sub test works correctly. 42: BPF filter : 42.1: Basic BPF filtering: Ok 42.2: BPF pinning : Ok 42.3: BPF prologue generation : Ok Tested-by: Sachin Sant Can you please use the above mentioned id(without vnet) in the reported-by ? - Sachin
Re: [PATCH] perf test: Fix parse-events tests to skip parametrized events
> On 07-Aug-2023, at 10:20 AM, Athira Rajeev > wrote: > > Testcase "Parsing of all PMU events from sysfs" parse events for > all PMUs, and not just cpu. In case of powerpc, the PowerVM > environment supports events from hv_24x7 and hv_gpci PMU which > is of example format like below: > > - hv_24x7/CPM_ADJUNCT_INST,domain=?,core=?/ > - hv_gpci/event,partition_id=?/ > > The value for "?" needs to be filled in depending on system > configuration. It is better to skip these parametrized events > in this test as it is done in: > 'commit b50d691e50e6 ("perf test: Fix "all PMU test" to skip > parametrized events")' which handled a simialr instance with > "all PMU test". > > Fix parse-events test to skip parametrized events since > it needs proper setup of the parameters. > > Signed-off-by: Athira Rajeev > — Thanks Athira for the fix. With this fix applied the reported problem Is fixed. 6.1: Test event parsing: Ok 6.2: Parsing of all PMU events from sysfs : Ok 6.3: Parsing of given PMU events from sysfs: Ok Tested-by: Sachin Sant - Sachin
[PATCH v8 1/2] powerpc/rtas: Rename rtas_error_rc to rtas_generic_errno
rtas_generic_errno() function will convert the generic rtas return codes into errno. Also, #define descriptive names for rtas return codes and use it instead of numeric values. Signed-off-by: Mahesh Salgaonkar --- (no changes since v7) Change in V7: - Until v6 there was only one patch with subject "PCI hotplug: rpaphp: Error out on busy status from get-sensor-state". Starting from v7, adding this new patch to introduce rtas_generic_errno() to handle generic rtas error codes. https://lore.kernel.org/all/20220429162545.GA79541@bhelgaas/ --- arch/powerpc/include/asm/rtas.h | 10 +++ arch/powerpc/kernel/rtas.c | 53 --- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3abe15ac79db1..5572a0a2f6e18 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -202,7 +202,9 @@ typedef struct { #define RTAS_USER_REGION_SIZE (64 * 1024) /* RTAS return status codes */ -#define RTAS_BUSY -2/* RTAS Busy */ +#define RTAS_HARDWARE_ERROR(-1) /* Hardware Error */ +#define RTAS_BUSY (-2) /* RTAS Busy */ +#define RTAS_INVALID_PARAMETER (-3) /* Invalid indicator/domain/sensor etc. */ #define RTAS_EXTENDED_DELAY_MIN9900 #define RTAS_EXTENDED_DELAY_MAX9905 @@ -212,6 +214,11 @@ typedef struct { #define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */ #define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */ +/* statuses specific to get-sensor-state */ +#define RTAS_SLOT_UNISOLATED (-9000) +#define RTAS_SLOT_NOT_UNISOLATED (-9001) +#define RTAS_SLOT_NOT_USABLE (-9002) + /* RTAS event classes */ #define RTAS_INTERNAL_ERROR0x8000 /* set bit 0 */ #define RTAS_EPOW_WARNING 0x4000 /* set bit 1 */ @@ -425,6 +432,7 @@ extern int rtas_set_indicator(int indicator, int index, int new_value); extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); int rtas_ibm_suspend_me(int *fw_status); +int rtas_generic_errno(int rtas_rc); struct rtc_time; extern time64_t rtas_get_boot_time(void); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c087320ff..80b6099e8ce20 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1330,33 +1330,34 @@ bool __ref rtas_busy_delay(int status) } EXPORT_SYMBOL_GPL(rtas_busy_delay); -static int rtas_error_rc(int rtas_rc) +int rtas_generic_errno(int rtas_rc) { int rc; switch (rtas_rc) { - case -1:/* Hardware Error */ - rc = -EIO; - break; - case -3:/* Bad indicator/domain/etc */ - rc = -EINVAL; - break; - case -9000: /* Isolation error */ - rc = -EFAULT; - break; - case -9001: /* Outstanding TCE/PTE */ - rc = -EEXIST; - break; - case -9002: /* No usable slot */ - rc = -ENODEV; - break; - default: - pr_err("%s: unexpected error %d\n", __func__, rtas_rc); - rc = -ERANGE; - break; + case RTAS_HARDWARE_ERROR: /* Hardware Error */ + rc = -EIO; + break; + case RTAS_INVALID_PARAMETER:/* Bad indicator/domain/etc */ + rc = -EINVAL; + break; + case RTAS_SLOT_UNISOLATED: /* Isolation error */ + rc = -EFAULT; + break; + case RTAS_SLOT_NOT_UNISOLATED: /* Outstanding TCE/PTE */ + rc = -EEXIST; + break; + case RTAS_SLOT_NOT_USABLE: /* No usable slot */ + rc = -ENODEV; + break; + default: + pr_err("%s: unexpected error %d\n", __func__, rtas_rc); + rc = -ERANGE; + break; } return rc; } +EXPORT_SYMBOL(rtas_generic_errno); int rtas_get_power_level(int powerdomain, int *level) { @@ -1370,7 +1371,7 @@ int rtas_get_power_level(int powerdomain, int *level) udelay(1); if (rc < 0) - return rtas_error_rc(rc); + return rtas_generic_errno(rc); return rc; } EXPORT_SYMBOL_GPL(rtas_get_power_level); @@ -1388,7 +1389,7 @@ int rtas_set_power_level(int powerdomain, int level, int *setlevel) } while (rtas_busy_delay(rc)); if (rc < 0) - return rtas_error_rc(rc); + return rtas_generic_errno(rc); return rc; } EXPORT_SYMBOL_GPL(rtas_set_power_level); @@ -1406,7
[PATCH v8 2/2] PCI: rpaphp: Error out on busy status from get-sensor-state
When certain PHB HW failure causes pHyp to recover PHB, it marks the PE state as temporarily unavailable until recovery is complete. This also triggers an EEH handler in Linux which needs to notify drivers, and perform recovery. But before notifying the driver about the PCI error it uses get_adapter_status()->rpaphp_get_sensor_state()->rtas_call(get-sensor-state) operation of the hotplug_slot to determine if the slot contains a device or not. If the slot is empty, the recovery is skipped entirely. eeh_event_handler() ->eeh_handle_normal_event() ->eeh_slot_presence_check() ->get_adapter_status() ->rpaphp_get_sensor_state() ->rtas_get_sensor() ->rtas_call(get-sensor-state) However on certain PHB failures, the RTAS call rtas_call(get-sensor-state) returns extended busy error (9902) until PHB is recovered by pHyp. Once PHB is recovered, the rtas_call(get-sensor-state) returns success with correct presence status. The RTAS call interface rtas_get_sensor() loops over the RTAS call on extended delay return code (9902) until the return value is either success (0) or error (-1). This causes the EEH handler to get stuck for ~6 seconds before it could notify that the PCI error has been detected and stop any active operations. Hence with running I/O traffic, during this 6 seconds, the network driver continues its operation and hits a timeout (netdev watchdog). [52732.244731] DEBUG: ibm_read_slot_reset_state2() [52732.244762] DEBUG: ret = 0, rets[0]=5, rets[1]=1, rets[2]=4000, rets[3]=> [52732.244798] DEBUG: in eeh_slot_presence_check [52732.244804] DEBUG: error state check [52732.244807] DEBUG: Is slot hotpluggable [52732.244810] DEBUG: hotpluggable ops ? [52732.244953] DEBUG: Calling ops->get_adapter_status [52732.244958] DEBUG: calling rpaphp_get_sensor_state [52736.564262] [ cut here ] [52736.564299] NETDEV WATCHDOG: enP64p1s0f3 (tg3): transmit queue 0 timed o> [52736.564324] WARNING: CPU: 1442 PID: 0 at net/sched/sch_generic.c:478 dev> [...] [52736.564505] NIP [c0c32368] dev_watchdog+0x438/0x440 [52736.564513] LR [c0c32364] dev_watchdog+0x434/0x440 On timeouts, network driver starts dumping debug information to console (e.g bnx2 driver calls bnx2x_panic_dump()), and go into recovery path while pHyp is still recovering the PHB. As part of recovery, the driver tries to reset the device and it keeps failing since every PCI read/write returns ff's. And when EEH recovery kicks-in, the driver is unable to recover the device. This impacts the ssh connection and leads to the system being inaccessible. To get the NIC working again it needs a reboot or re-assign the I/O adapter from HMC. [ 9531.168587] EEH: Beginning: 'slot_reset' [ 9531.168601] PCI 0013:01:00.0#1: EEH: Invoking bnx2x->slot_reset() [...] [ 9614.110094] bnx2x: [bnx2x_func_stop:9129(enP19p1s0f0)]FUNC_STOP ramrod failed. Running a dry transaction [ 9614.110300] bnx2x: [bnx2x_igu_int_disable:902(enP19p1s0f0)]BUG! Proper val not read from IGU! [ 9629.178067] bnx2x: [bnx2x_fw_command:3055(enP19p1s0f0)]FW failed to respond! [ 9629.178085] bnx2x 0013:01:00.0 enP19p1s0f0: bc 7.10.4 [ 9629.178091] bnx2x: [bnx2x_fw_dump_lvl:789(enP19p1s0f0)]Cannot dump MCP info while in PCI error [ 9644.241813] bnx2x: [bnx2x_io_slot_reset:14245(enP19p1s0f0)]IO slot reset --> driver unload [...] [ 9644.241819] PCI 0013:01:00.0#1: EEH: bnx2x driver reports: 'disconnect' [ 9644.241823] PCI 0013:01:00.1#1: EEH: Invoking bnx2x->slot_reset() [ 9644.241827] bnx2x: [bnx2x_io_slot_reset:14229(enP19p1s0f1)]IO slot reset initializing... [ 9644.241916] bnx2x 0013:01:00.1: enabling device (0140 -> 0142) [ 9644.258604] bnx2x: [bnx2x_io_slot_reset:14245(enP19p1s0f1)]IO slot reset --> driver unload [ 9644.258612] PCI 0013:01:00.1#1: EEH: bnx2x driver reports: 'disconnect' [ 9644.258615] EEH: Finished:'slot_reset' with aggregate recovery state:'disconnect' [ 9644.258620] EEH: Unable to recover from failure from PHB#13-PE#1. [ 9644.261811] EEH: Beginning: 'error_detected(permanent failure)' [...] [ 9644.261823] EEH: Finished:'error_detected(permanent failure)' Hence, it becomes important to inform driver about the PCI error detection as early as possible, so that driver is aware of PCI error and waits for EEH handler's next action for successful recovery. Current implementation uses rtas_get_sensor() API which blocks the slot check state until RTAS call returns success. To avoid this, fix the PCI hotplug driver (rpaphp) to return an error (-EBUSY) if the slot presence state can not be detected immediately while PE is in EEH recovery state. Change rpaphp_get_sensor_state() to invoke rtas_call(get-sensor-state) directly only if the respective PE is in EEH recovery state, and take actions based on RTAS return status. This way EEH handler will not be blocked on rpaphp_get_sensor_state() and can immediately notify driver about the PCI error and stop any active operati