Re: [PATCH V11 03/17] riscv: Use Zicbop in arch_xchg when available
On Sat, Sep 16, 2023 at 9:25 AM Leonardo Bras wrote: > > On Fri, Sep 15, 2023 at 08:36:31PM +0800, Guo Ren wrote: > > On Wed, Sep 13, 2023 at 4:50 PM Leonardo Bras wrote: > > > > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guo...@kernel.org wrote: > > > > From: Guo Ren > > > > > > > > Cache-block prefetch instructions are HINTs to the hardware to > > > > indicate that software intends to perform a particular type of > > > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > > > improve the arch_xchg for qspinlock xchg_tail. > > > > > > > > Signed-off-by: Guo Ren > > > > Signed-off-by: Guo Ren > > > > --- > > > > arch/riscv/Kconfig | 15 +++ > > > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > > > arch/riscv/include/asm/hwcap.h | 1 + > > > > arch/riscv/include/asm/insn-def.h | 5 + > > > > arch/riscv/include/asm/processor.h | 13 + > > > > arch/riscv/kernel/cpufeature.c | 1 + > > > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > > > index e9ae6fa232c3..2c346fe169c1 100644 > > > > --- a/arch/riscv/Kconfig > > > > +++ b/arch/riscv/Kconfig > > > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > > > > > If you don't know what to do here, say Y. > > > > > > > > +config RISCV_ISA_ZICBOP > > > > + bool "Zicbop extension support for cache block prefetch" > > > > + depends on MMU > > > > + depends on RISCV_ALTERNATIVE > > > > + default y > > > > + help > > > > +Adds support to dynamically detect the presence of the ZICBOP > > > > +extension (Cache Block Prefetch Operations) and enable its > > > > +usage. > > > > + > > > > +The Zicbop extension can be used to prefetch cache block for > > > > +read/write/instruction fetch. > > > > + > > > > +If you don't know what to do here, say Y. > > > > + > > > > config TOOLCHAIN_HAS_ZIHINTPAUSE > > > > bool > > > > default y > > > > diff --git a/arch/riscv/include/asm/cmpxchg.h > > > > b/arch/riscv/include/asm/cmpxchg.h > > > > index 702725727671..56eff7a9d2d2 100644 > > > > --- a/arch/riscv/include/asm/cmpxchg.h > > > > +++ b/arch/riscv/include/asm/cmpxchg.h > > > > @@ -11,6 +11,7 @@ > > > > > > > > #include > > > > #include > > > > +#include > > > > > > > > #define __arch_xchg_masked(prepend, append, r, p, n) \ > > > > ({ \ > > > > @@ -25,6 +26,7 @@ > > > > \ > > > > __asm__ __volatile__ ( \ > > > > prepend \ > > > > +PREFETCHW_ASM(%5)\ > > > > "0: lr.w %0, %2\n" \ > > > > "and %1, %0, %z4\n" \ > > > > "or %1, %1, %z3\n" \ > > > > @@ -32,7 +34,7 @@ > > > > "bnez %1, 0b\n" \ > > > > append \ > > > > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ > > > > -: "rJ" (__newx), "rJ" (~__mask) \ > > > > +: "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ > > > > : "memory"); \ > > > > \ > > > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > > > diff --git a/arch/riscv/include/asm/hwcap.h > > > > b/arch/riscv/include/asm/hwcap.h > > > > index b7b58258f6c7..78b7b8b53778 100644 > > > > --- a/arch/riscv/include/asm/hwcap.h > > > > +++ b/arch/riscv/include/asm/hwcap.h > > > > @@ -58,6 +58,7 @@ > > > > #define RISCV_ISA_EXT_ZICSR 40 > > > > #define RISCV_ISA_EXT_ZIFENCEI 41 > > > > #define RISCV_ISA_EXT_ZIHPM 42 > > > > +#define RISCV_ISA_EXT_ZICBOP 43 > > > > > > > > #define RISCV_ISA_EXT_MAX64 > > > > > > > > diff --git a/arch/riscv/include/asm/insn-def.h > > > > b/arch/riscv/include/asm/insn-def.h > > > > index 6960beb75f32..dc590d331894 100644 > > > > --- a/arch/riscv/include/asm/insn-def.h > > > > +++ b/arch/riscv/include/asm/insn-def.h > > > > @@ -134,6 +134,7 @@ > > > > > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > > > @@ -196,4 +197,8 @@ > > > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),
Re: [PATCH V11 11/17] RISC-V: paravirt: pvqspinlock: Add paravirt qspinlock skeleton
On Fri, Sep 15, 2023 at 1:42 PM Leonardo Bras wrote: > > On Sun, Sep 10, 2023 at 04:29:05AM -0400, guo...@kernel.org wrote: > > From: Guo Ren > > > > Using static_call to switch between: > > native_queued_spin_lock_slowpath()__pv_queued_spin_lock_slowpath() > > native_queued_spin_unlock() __pv_queued_spin_unlock() > > > > Finish the pv_wait implementation, but pv_kick needs the SBI > > definition of the next patches. > > > > Signed-off-by: Guo Ren > > Signed-off-by: Guo Ren > > --- > > arch/riscv/include/asm/Kbuild | 1 - > > arch/riscv/include/asm/qspinlock.h | 35 + > > arch/riscv/include/asm/qspinlock_paravirt.h | 29 +++ > > arch/riscv/include/asm/spinlock.h | 2 +- > > arch/riscv/kernel/qspinlock_paravirt.c | 57 + > > arch/riscv/kernel/setup.c | 4 ++ > > 6 files changed, 126 insertions(+), 2 deletions(-) > > create mode 100644 arch/riscv/include/asm/qspinlock.h > > create mode 100644 arch/riscv/include/asm/qspinlock_paravirt.h > > create mode 100644 arch/riscv/kernel/qspinlock_paravirt.c > > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild > > index a0dc85e4a754..b89cb3b73c13 100644 > > --- a/arch/riscv/include/asm/Kbuild > > +++ b/arch/riscv/include/asm/Kbuild > > @@ -7,6 +7,5 @@ generic-y += parport.h > > generic-y += spinlock_types.h > > generic-y += qrwlock.h > > generic-y += qrwlock_types.h > > -generic-y += qspinlock.h > > generic-y += user.h > > generic-y += vmlinux.lds.h > > diff --git a/arch/riscv/include/asm/qspinlock.h > > b/arch/riscv/include/asm/qspinlock.h > > new file mode 100644 > > index ..7d4f416c908c > > --- /dev/null > > +++ b/arch/riscv/include/asm/qspinlock.h > > @@ -0,0 +1,35 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +/* > > + * Copyright (c), 2023 Alibaba Cloud > > + * Authors: > > + * Guo Ren > > + */ > > + > > +#ifndef _ASM_RISCV_QSPINLOCK_H > > +#define _ASM_RISCV_QSPINLOCK_H > > + > > +#ifdef CONFIG_PARAVIRT_SPINLOCKS > > +#include > > + > > +/* How long a lock should spin before we consider blocking */ > > +#define SPIN_THRESHOLD (1 << 15) > > + > > +void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); > > +void __pv_init_lock_hash(void); > > +void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); > > + > > +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 > > val) > > +{ > > + static_call(pv_queued_spin_lock_slowpath)(lock, val); > > +} > > + > > +#define queued_spin_unlock queued_spin_unlock > > +static inline void queued_spin_unlock(struct qspinlock *lock) > > +{ > > + static_call(pv_queued_spin_unlock)(lock); > > +} > > +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ > > + > > +#include > > + > > +#endif /* _ASM_RISCV_QSPINLOCK_H */ > > diff --git a/arch/riscv/include/asm/qspinlock_paravirt.h > > b/arch/riscv/include/asm/qspinlock_paravirt.h > > new file mode 100644 > > index ..9681e851f69d > > --- /dev/null > > +++ b/arch/riscv/include/asm/qspinlock_paravirt.h > > @@ -0,0 +1,29 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +/* > > + * Copyright (c), 2023 Alibaba Cloud > > + * Authors: > > + * Guo Ren > > + */ > > + > > +#ifndef _ASM_RISCV_QSPINLOCK_PARAVIRT_H > > +#define _ASM_RISCV_QSPINLOCK_PARAVIRT_H > > + > > +void pv_wait(u8 *ptr, u8 val); > > +void pv_kick(int cpu); > > + > > +void dummy_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); > > +void dummy_queued_spin_unlock(struct qspinlock *lock); > > + > > +DECLARE_STATIC_CALL(pv_queued_spin_lock_slowpath, > > dummy_queued_spin_lock_slowpath); > > +DECLARE_STATIC_CALL(pv_queued_spin_unlock, dummy_queued_spin_unlock); > > + > > +void __init pv_qspinlock_init(void); > > + > > +static inline bool pv_is_native_spin_unlock(void) > > +{ > > + return false; > > +} > > + > > +void __pv_queued_spin_unlock(struct qspinlock *lock); > > + > > +#endif /* _ASM_RISCV_QSPINLOCK_PARAVIRT_H */ > > diff --git a/arch/riscv/include/asm/spinlock.h > > b/arch/riscv/include/asm/spinlock.h > > index 6b38d6616f14..ed4253f491fe 100644 > > --- a/arch/riscv/include/asm/spinlock.h > > +++ b/arch/riscv/include/asm/spinlock.h > > @@ -39,7 +39,7 @@ static inline bool virt_spin_lock(struct qspinlock *lock) > > #undef arch_spin_trylock > > #undef arch_spin_unlock > > > > -#include > > +#include > > #include > > > > #undef arch_spin_is_locked > > diff --git a/arch/riscv/kernel/qspinlock_paravirt.c > > b/arch/riscv/kernel/qspinlock_paravirt.c > > new file mode 100644 > > index ..85ff5a3ec234 > > --- /dev/null > > +++ b/arch/riscv/kernel/qspinlock_paravirt.c > > @@ -0,0 +1,57 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Copyright (c), 2023 Alibaba Cloud > > + * Authors: > > + * Guo Ren > > + */ > > + > > +#include > > +#include > > +#include > > + > > +void pv_kick(int cpu) > > +{ > > + return; > > +} > > + > >
Re: [PATCH V11 05/17] riscv: qspinlock: Add basic queued_spinlock support
On Fri, Sep 15, 2023 at 5:08 PM Leonardo Bras wrote: > > On Fri, Sep 15, 2023 at 10:10:25AM +0800, Guo Ren wrote: > > On Thu, Sep 14, 2023 at 5:43 PM Leonardo Bras wrote: > > > > > > On Thu, Sep 14, 2023 at 12:46:56PM +0800, Guo Ren wrote: > > > > On Thu, Sep 14, 2023 at 4:29 AM Leonardo Bras > > > > wrote: > > > > > > > > > > On Sun, Sep 10, 2023 at 04:28:59AM -0400, guo...@kernel.org wrote: > > > > > > From: Guo Ren > > > > > > > > > > > > The requirements of qspinlock have been documented by commit: > > > > > > a8ad07e5240c ("asm-generic: qspinlock: Indicate the use of > > > > > > mixed-size > > > > > > atomics"). > > > > > > > > > > > > Although RISC-V ISA gives out a weaker forward guarantee LR/SC, > > > > > > which > > > > > > doesn't satisfy the requirements of qspinlock above, it won't > > > > > > prevent > > > > > > some riscv vendors from implementing a strong fwd guarantee LR/SC in > > > > > > microarchitecture to match xchg_tail requirement. T-HEAD C9xx > > > > > > processor > > > > > > is the one. > > > > > > > > > > > > We've tested the patch on SOPHGO sg2042 & th1520 and passed the > > > > > > stress > > > > > > test on Fedora & Ubuntu & OpenEuler ... Here is the performance > > > > > > comparison between qspinlock and ticket_lock on sg2042 (64 cores): > > > > > > > > > > > > sysbench test=threads threads=32 yields=100 lock=8 (+13.8%): > > > > > > queued_spinlock 0.5109/0.00 > > > > > > ticket_spinlock 0.5814/0.00 > > > > > > > > > > > > perf futex/hash (+6.7%): > > > > > > queued_spinlock 1444393 operations/sec (+- 0.09%) > > > > > > ticket_spinlock 1353215 operations/sec (+- 0.15%) > > > > > > > > > > > > perf futex/wake-parallel (+8.6%): > > > > > > queued_spinlock (waking 1/64 threads) in 0.0253 ms (+-2.90%) > > > > > > ticket_spinlock (waking 1/64 threads) in 0.0275 ms (+-3.12%) > > > > > > > > > > > > perf futex/requeue (+4.2%): > > > > > > queued_spinlock Requeued 64 of 64 threads in 0.0785 ms (+-0.55%) > > > > > > ticket_spinlock Requeued 64 of 64 threads in 0.0818 ms (+-4.12%) > > > > > > > > > > > > System Benchmarks (+6.4%) > > > > > > queued_spinlock: > > > > > > System Benchmarks Index Values BASELINE > > > > > > RESULTINDEX > > > > > > Dhrystone 2 using register variables 116700.0 > > > > > > 628613745.4 53865.8 > > > > > > Double-Precision Whetstone 55.0 > > > > > > 182422.8 33167.8 > > > > > > Execl Throughput 43.0 > > > > > > 13116.6 3050.4 > > > > > > File Copy 1024 bufsize 2000 maxblocks 3960.0 > > > > > > 7762306.2 19601.8 > > > > > > File Copy 256 bufsize 500 maxblocks1655.0 > > > > > > 3417556.8 20649.9 > > > > > > File Copy 4096 bufsize 8000 maxblocks 5800.0 > > > > > > 7427995.7 12806.9 > > > > > > Pipe Throughput 12440.0 > > > > > > 23058600.5 18535.9 > > > > > > Pipe-based Context Switching 4000.0 > > > > > > 2835617.7 7089.0 > > > > > > Process Creation126.0 > > > > > > 12537.3995.0 > > > > > > Shell Scripts (1 concurrent) 42.4 > > > > > > 57057.4 13456.9 > > > > > > Shell Scripts (8 concurrent) 6.0 > > > > > > 7367.1 12278.5 > > > > > > System Call Overhead 15000.0 > > > > > > 33308301.3 22205.5 > > > > > > > > > > > > > > > > > > System Benchmarks Index Score > > > > > > 12426.1 > > > > > > > > > > > > ticket_spinlock: > > > > > > System Benchmarks Index Values BASELINE > > > > > > RESULTINDEX > > > > > > Dhrystone 2 using register variables 116700.0 > > > > > > 626541701.9 53688.2 > > > > > > Double-Precision Whetstone 55.0 > > > > > > 181921.0 33076.5 > > > > > > Execl Throughput 43.0 > > > > > > 12625.1 2936.1 > > > > > > File Copy 1024 bufsize 2000 maxblocks 3960.0 > > > > > > 6553792.9 16550.0 > > > > > > File Copy 256 bufsize 500 maxblocks1655.0 > > > > > > 3189231.6 19270.3 > > > > > > File Copy 4096 bufsize 8000 maxblocks 5800.0 > > > > > > 7221277.0 12450.5 > > > > > > Pipe Throughput 12440.0 > > > > > > 20594018.7 16554.7 > > > > > > Pipe-based Context Switching 4000.0 > > > > > > 2571117.7 6427.8 > > > > > > Process Creation126.0 > > > > > > 10798.4857.0 > > > > > > Shell Scripts (1 concurrent) 42.4 > > > > > > 57227.5 13497.1 > > > > > > Shell Scripts (8 concurrent) 6.0 > > >
Re: [PATCH V11 12/17] RISC-V: paravirt: pvqspinlock: Add nopvspin kernel parameter
On Fri, Sep 15, 2023 at 2:05 PM Leonardo Bras wrote: > > On Sun, Sep 10, 2023 at 04:29:06AM -0400, guo...@kernel.org wrote: > > From: Guo Ren > > > > Disables the qspinlock slow path using PV optimizations which > > allow the hypervisor to 'idle' the guest on lock contention. > > > > Signed-off-by: Guo Ren > > Signed-off-by: Guo Ren > > --- > > Documentation/admin-guide/kernel-parameters.txt | 2 +- > > arch/riscv/kernel/qspinlock_paravirt.c | 13 + > > 2 files changed, 14 insertions(+), 1 deletion(-) > > > > diff --git a/Documentation/admin-guide/kernel-parameters.txt > > b/Documentation/admin-guide/kernel-parameters.txt > > index f75bedc50e00..e74aed631573 100644 > > --- a/Documentation/admin-guide/kernel-parameters.txt > > +++ b/Documentation/admin-guide/kernel-parameters.txt > > @@ -3857,7 +3857,7 @@ > > as generic guest with no PV drivers. Currently support > > XEN HVM, KVM, HYPER_V and VMWARE guest. > > > > - nopvspin[X86,XEN,KVM] > > + nopvspin[X86,XEN,KVM,RISC-V] > > Disables the qspinlock slow path using PV > > optimizations > > which allow the hypervisor to 'idle' the guest on lock > > contention. > > diff --git a/arch/riscv/kernel/qspinlock_paravirt.c > > b/arch/riscv/kernel/qspinlock_paravirt.c > > index 85ff5a3ec234..a0ad4657f437 100644 > > --- a/arch/riscv/kernel/qspinlock_paravirt.c > > +++ b/arch/riscv/kernel/qspinlock_paravirt.c > > @@ -41,8 +41,21 @@ EXPORT_STATIC_CALL(pv_queued_spin_lock_slowpath); > > DEFINE_STATIC_CALL(pv_queued_spin_unlock, native_queued_spin_unlock); > > EXPORT_STATIC_CALL(pv_queued_spin_unlock); > > > > +static bool nopvspin; > > It is only used in init, so it makes sense to add __initdata. > > static bool nopvspin __initdata; Okay. > > Other than that, LGTM: > Reviewed-by: Leonardo Bras > > Thanks! > Leo > > > +static __init int parse_nopvspin(char *arg) > > +{ > > + nopvspin = true; > > + return 0; > > +} > > +early_param("nopvspin", parse_nopvspin); > > + > > void __init pv_qspinlock_init(void) > > { > > + if (nopvspin) { > > + pr_info("PV qspinlocks disabled\n"); > > + return; > > + } > > + > > if (num_possible_cpus() == 1) > > return; > > > > -- > > 2.36.1 > > > -- Best Regards Guo Ren
Re: [PATCH V11 13/17] RISC-V: paravirt: pvqspinlock: Add SBI implementation
On Fri, Sep 15, 2023 at 2:23 PM Leonardo Bras wrote: > > On Sun, Sep 10, 2023 at 04:29:07AM -0400, guo...@kernel.org wrote: > > From: Guo Ren > > > > Implement pv_kick with SBI implementation, and add SBI_EXT_PVLOCK > > extension detection. > > > > Signed-off-by: Guo Ren > > Signed-off-by: Guo Ren > > --- > > arch/riscv/include/asm/sbi.h | 6 ++ > > arch/riscv/kernel/qspinlock_paravirt.c | 7 ++- > > 2 files changed, 12 insertions(+), 1 deletion(-) > > > > diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h > > index e0233b3d7a5f..3533f8d4f3e2 100644 > > --- a/arch/riscv/include/asm/sbi.h > > +++ b/arch/riscv/include/asm/sbi.h > > @@ -30,6 +30,7 @@ enum sbi_ext_id { > > SBI_EXT_HSM = 0x48534D, > > SBI_EXT_SRST = 0x53525354, > > SBI_EXT_PMU = 0x504D55, > > + SBI_EXT_PVLOCK = 0xAB0401, > > > > /* Experimentals extensions must lie within this range */ > > SBI_EXT_EXPERIMENTAL_START = 0x0800, > > @@ -243,6 +244,11 @@ enum sbi_pmu_ctr_type { > > /* Flags defined for counter stop function */ > > #define SBI_PMU_STOP_FLAG_RESET (1 << 0) > > > > +/* SBI PVLOCK (kick cpu out of wfi) */ > > +enum sbi_ext_pvlock_fid { > > + SBI_EXT_PVLOCK_KICK_CPU = 0, > > +}; > > + > > #define SBI_SPEC_VERSION_DEFAULT 0x1 > > #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 > > #define SBI_SPEC_VERSION_MAJOR_MASK 0x7f > > diff --git a/arch/riscv/kernel/qspinlock_paravirt.c > > b/arch/riscv/kernel/qspinlock_paravirt.c > > index a0ad4657f437..571626f350be 100644 > > --- a/arch/riscv/kernel/qspinlock_paravirt.c > > +++ b/arch/riscv/kernel/qspinlock_paravirt.c > > @@ -11,6 +11,8 @@ > > > > void pv_kick(int cpu) > > { > > + sbi_ecall(SBI_EXT_PVLOCK, SBI_EXT_PVLOCK_KICK_CPU, > > + cpuid_to_hartid_map(cpu), 0, 0, 0, 0, 0); > > return; > > } > > > > @@ -25,7 +27,7 @@ void pv_wait(u8 *ptr, u8 val) > > if (READ_ONCE(*ptr) != val) > > goto out; > > > > - /* wait_for_interrupt(); */ > > + wait_for_interrupt(); > > out: > > local_irq_restore(flags); > > } > > @@ -62,6 +64,9 @@ void __init pv_qspinlock_init(void) > > if(sbi_get_firmware_id() != SBI_EXT_BASE_IMPL_ID_KVM) > > return; > > > > + if (!sbi_probe_extension(SBI_EXT_PVLOCK)) > > + return; > > + > > pr_info("PV qspinlocks enabled\n"); > > __pv_init_lock_hash(); > > > > -- > > 2.36.1 > > > > IIUC this PVLOCK extension is now a requirement to use pv_qspinlock(), and > it allows a cpu to use an instruction to wait for interrupt in pv_wait(), > and kicks it out of this wait using a new sbi_ecall() on pv_kick(). Yes. > > Overall it LGTM, but would be nice to have the reference doc in the commit > msg. I end up inferring some of the inner workings by your implementation, > which is not ideal for reviewing. I would improve the commit msg in the next version of patch. > > If understanding above is right, > Reviewed-by: Leonardo Bras > > Thanks! > Leo > -- Best Regards Guo Ren
Re: [PATCH V11 08/17] riscv: qspinlock: Add virt_spin_lock() support for KVM guest
On Thu, Sep 14, 2023 at 4:02 PM Leonardo Bras wrote: > > On Sun, Sep 10, 2023 at 04:29:02AM -0400, guo...@kernel.org wrote: > > From: Guo Ren > > > > Add a static key controlling whether virt_spin_lock() should be > > called or not. When running on bare metal set the new key to > > false. > > > > The KVM guests fall back to a Test-and-Set spinlock, because fair > > locks have horrible lock 'holder' preemption issues. The > > virt_spin_lock_key would shortcut for the > > queued_spin_lock_slowpath() function that allow virt_spin_lock to > > hijack it. > > > > Signed-off-by: Guo Ren > > Signed-off-by: Guo Ren > > --- > > .../admin-guide/kernel-parameters.txt | 4 +++ > > arch/riscv/include/asm/sbi.h | 8 + > > arch/riscv/include/asm/spinlock.h | 22 ++ > > arch/riscv/kernel/sbi.c | 2 +- > > arch/riscv/kernel/setup.c | 30 ++- > > 5 files changed, 64 insertions(+), 2 deletions(-) > > > > diff --git a/Documentation/admin-guide/kernel-parameters.txt > > b/Documentation/admin-guide/kernel-parameters.txt > > index 61cacb8dfd0e..f75bedc50e00 100644 > > --- a/Documentation/admin-guide/kernel-parameters.txt > > +++ b/Documentation/admin-guide/kernel-parameters.txt > > @@ -3927,6 +3927,10 @@ > > no_uaccess_flush > > [PPC] Don't flush the L1-D cache after accessing user > > data. > > > > + no_virt_spin[RISC-V] Disable virt_spin_lock in KVM guest to use > > + native_queued_spinlock when the nopvspin option is > > enabled. > > + This would help vcpu=pcpu scenarios. > > + > > novmcoredd [KNL,KDUMP] > > Disable device dump. Device dump allows drivers to > > append dump data to vmcore so you can collect driver > > diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h > > index 501e06e52078..e0233b3d7a5f 100644 > > --- a/arch/riscv/include/asm/sbi.h > > +++ b/arch/riscv/include/asm/sbi.h > > @@ -50,6 +50,13 @@ enum sbi_ext_base_fid { > > SBI_EXT_BASE_GET_MIMPID, > > }; > > > > +enum sbi_ext_base_impl_id { > > + SBI_EXT_BASE_IMPL_ID_BBL = 0, > > + SBI_EXT_BASE_IMPL_ID_OPENSBI, > > + SBI_EXT_BASE_IMPL_ID_XVISOR, > > + SBI_EXT_BASE_IMPL_ID_KVM, > > +}; > > + > > enum sbi_ext_time_fid { > > SBI_EXT_TIME_SET_TIMER = 0, > > }; > > @@ -269,6 +276,7 @@ int sbi_console_getchar(void); > > long sbi_get_mvendorid(void); > > long sbi_get_marchid(void); > > long sbi_get_mimpid(void); > > +long sbi_get_firmware_id(void); > > void sbi_set_timer(uint64_t stime_value); > > void sbi_shutdown(void); > > void sbi_send_ipi(unsigned int cpu); > > diff --git a/arch/riscv/include/asm/spinlock.h > > b/arch/riscv/include/asm/spinlock.h > > index 8ea0fee80652..6b38d6616f14 100644 > > --- a/arch/riscv/include/asm/spinlock.h > > +++ b/arch/riscv/include/asm/spinlock.h > > @@ -4,6 +4,28 @@ > > #define __ASM_RISCV_SPINLOCK_H > > > > #ifdef CONFIG_QUEUED_SPINLOCKS > > +/* > > + * The KVM guests fall back to a Test-and-Set spinlock, because fair locks > > + * have horrible lock 'holder' preemption issues. The virt_spin_lock_key > > + * would shortcut for the queued_spin_lock_slowpath() function that allow > > + * virt_spin_lock to hijack it. > > + */ > > +DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); > > + > > +#define virt_spin_lock virt_spin_lock > > +static inline bool virt_spin_lock(struct qspinlock *lock) > > +{ > > + if (!static_branch_likely(&virt_spin_lock_key)) > > + return false; > > + > > + do { > > + while (atomic_read(&lock->val) != 0) > > + cpu_relax(); > > + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); > > + > > + return true; > > +} > > + > > #define _Q_PENDING_LOOPS (1 << 9) > > #endif > > > > diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c > > index 88eea3a99ee0..cdd45edc8db4 100644 > > --- a/arch/riscv/kernel/sbi.c > > +++ b/arch/riscv/kernel/sbi.c > > @@ -555,7 +555,7 @@ static inline long sbi_get_spec_version(void) > > return __sbi_base_ecall(SBI_EXT_BASE_GET_SPEC_VERSION); > > } > > > > -static inline long sbi_get_firmware_id(void) > > +long sbi_get_firmware_id(void) > > { > > return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_ID); > > } > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c > > index 0f084f037651..c57d15b05160 100644 > > --- a/arch/riscv/kernel/setup.c > > +++ b/arch/riscv/kernel/setup.c > > @@ -26,6 +26,7 @@ > > #include > > #include > > #include > > +#include > > #include > > #include > > #include > > @@ -283,16 +284,43 @@ DEFINE_STATIC_KEY_TRUE(combo_qspinlock_key); > > EXPORT_SYMBOL(combo_qspinlock_key); > > #endif > > > > +#ifdef CONFIG_QUEUED_SPINLOCKS > > +static bool no_virt_spin_key = false; > > I suggest no _key, also there is no need for "= false". > To be consistent
Re: [PATCH V11 09/17] riscv: qspinlock: errata: Add ERRATA_THEAD_WRITE_ONCE fixup
On Thu, Sep 14, 2023 at 4:32 PM Leonardo Bras wrote: > > On Sun, Sep 10, 2023 at 04:29:03AM -0400, guo...@kernel.org wrote: > > From: Guo Ren > > > > The early version of T-Head C9xx cores has a store merge buffer > > delay problem. The store merge buffer could improve the store queue > > performance by merging multi-store requests, but when there are not > > continued store requests, the prior single store request would be > > waiting in the store queue for a long time. That would cause > > significant problems for communication between multi-cores. This > > problem was found on sg2042 & th1520 platforms with the qspinlock > > lock torture test. > > > > So appending a fence w.o could immediately flush the store merge > > buffer and let other cores see the write result. > > > > This will apply the WRITE_ONCE errata to handle the non-standard > > behavior via appending a fence w.o instruction for WRITE_ONCE(). > > > > Signed-off-by: Guo Ren > > Signed-off-by: Guo Ren > > --- > > arch/riscv/Kconfig.errata | 19 +++ > > arch/riscv/errata/thead/errata.c | 20 > > arch/riscv/include/asm/errata_list.h | 13 - > > arch/riscv/include/asm/rwonce.h| 24 > > arch/riscv/include/asm/vendorid_list.h | 14 ++ > > include/asm-generic/rwonce.h | 2 ++ > > 6 files changed, 79 insertions(+), 13 deletions(-) > > create mode 100644 arch/riscv/include/asm/rwonce.h > > > > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata > > index 1aa85a427ff3..c919cc3f1a3a 100644 > > --- a/arch/riscv/Kconfig.errata > > +++ b/arch/riscv/Kconfig.errata > > @@ -77,4 +77,23 @@ config ERRATA_THEAD_PMU > > > > If you don't know what to do here, say "Y". > > > > +config ERRATA_THEAD_WRITE_ONCE > > + bool "Apply T-Head WRITE_ONCE errata" > > + depends on ERRATA_THEAD > > + default y > > + help > > + The early version of T-Head C9xx cores has a store merge buffer > > + delay problem. The store merge buffer could improve the store queue > > + performance by merging multi-store requests, but when there are no > > + continued store requests, the prior single store request would be > > + waiting in the store queue for a long time. That would cause > > + significant problems for communication between multi-cores. > > Appending > > + a fence w.o could immediately flush the store merge buffer and let > > + other cores see the write result. > > + > > + This will apply the WRITE_ONCE errata to handle the non-standard > > + behavior via appending a fence w.o instruction for WRITE_ONCE(). > > + > > + If you don't know what to do here, say "Y". > > + > > endmenu # "CPU errata selection" > > diff --git a/arch/riscv/errata/thead/errata.c > > b/arch/riscv/errata/thead/errata.c > > index be84b14f0118..751eb5a7f614 100644 > > --- a/arch/riscv/errata/thead/errata.c > > +++ b/arch/riscv/errata/thead/errata.c > > @@ -69,6 +69,23 @@ static bool errata_probe_pmu(unsigned int stage, > > return true; > > } > > > > +static bool errata_probe_write_once(unsigned int stage, > > + unsigned long arch_id, unsigned long > > impid) > > +{ > > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE)) > > + return false; > > + > > + /* target-c9xx cores report arch_id and impid as 0 */ > > + if (arch_id != 0 || impid != 0) > > + return false; > > + > > + if (stage == RISCV_ALTERNATIVES_BOOT || > > + stage == RISCV_ALTERNATIVES_MODULE) > > + return true; > > + > > + return false; > > +} > > + > > static u32 thead_errata_probe(unsigned int stage, > > unsigned long archid, unsigned long impid) > > { > > @@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage, > > if (errata_probe_pmu(stage, archid, impid)) > > cpu_req_errata |= BIT(ERRATA_THEAD_PMU); > > > > + if (errata_probe_write_once(stage, archid, impid)) > > + cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE); > > + > > return cpu_req_errata; > > } > > > > diff --git a/arch/riscv/include/asm/errata_list.h > > b/arch/riscv/include/asm/errata_list.h > > index 712cab7adffe..fbb2b8d39321 100644 > > --- a/arch/riscv/include/asm/errata_list.h > > +++ b/arch/riscv/include/asm/errata_list.h > > @@ -11,19 +11,6 @@ > > #include > > #include > > > > -#ifdef CONFIG_ERRATA_SIFIVE > > -#define ERRATA_SIFIVE_CIP_453 0 > > -#define ERRATA_SIFIVE_CIP_1200 1 > > -#define ERRATA_SIFIVE_NUMBER 2 > > -#endif > > - > > -#ifdef CONFIG_ERRATA_THEAD > > -#define ERRATA_THEAD_PBMT 0 > > -#define ERRATA_THEAD_CMO 1 > > -#define ERRATA_THEAD_PMU 2 > > -#define ERRATA_THEAD_NUMBER 3 > > -#endif > > - > > Here I understand you are moving stuff from errata_list.h to > vendorid_list.h. Wouldn't it be better to do