Module Name: src Committed By: martin Date: Sat Jul 20 16:11:27 UTC 2024
Modified Files: src/common/lib/libc/arch/i386/atomic [netbsd-10]: atomic.S src/common/lib/libc/arch/x86_64/atomic [netbsd-10]: atomic.S src/sys/arch/amd64/amd64 [netbsd-10]: cpufunc.S src/sys/arch/i386/i386 [netbsd-10]: cpufunc.S src/sys/arch/xen/include [netbsd-10]: hypervisor.h xenring.h Log Message: Pull up following revision(s) (requested by riastradh in ticket #764): common/lib/libc/arch/i386/atomic/atomic.S: revision 1.37 sys/arch/xen/include/xenring.h: revision 1.8 sys/arch/i386/i386/cpufunc.S: revision 1.52 sys/arch/amd64/amd64/cpufunc.S: revision 1.68 sys/arch/xen/include/hypervisor.h: revision 1.60 common/lib/libc/arch/x86_64/atomic/atomic.S: revision 1.30 xen: Don't hotpatch away LOCK prefix in xen_mb, even on UP boots. Both xen_mb and membar_sync are designed to provide store-before-load ordering, but xen_mb has to provide it in synchronizing guest with hypervisor, while membar_sync only has to provide it in synchronizing one (guest) CPU with another (guest) CPU. It is safe to hotpatch away the LOCK prefix in membar_sync on a uniprocessor boot because membar_sync is only designed to coordinate between normal memory on multiple CPUs, and is never necessary when there's only one CPU involved. But xen_mb is used to coordinate between the guest and the `device' implemented by a hypervisor, which might be running on another _physical_ CPU even if the NetBSD guest only sees one `CPU', i.e., one _virtual_ CPU. So even on `uniprocessor' boots, xen_mb must still issue an instruction with store-before-load ordering on multiprocessor systems, such as a LOCK ADD (or MFENCE, but MFENCE is costlier for no benefit here). No need to change xen_wmb (release ordering, load/store-before-store) or xen_rmb (acquire ordering, load-before-load/store) because every x86 store is a store-release and every x86 load is a load-acquire, even on multiprocessor systems, so there's no hotpatching involved anyway. PR kern/57199 To generate a diff of this commit: cvs rdiff -u -r1.36 -r1.36.2.1 src/common/lib/libc/arch/i386/atomic/atomic.S cvs rdiff -u -r1.29 -r1.29.2.1 \ src/common/lib/libc/arch/x86_64/atomic/atomic.S cvs rdiff -u -r1.65 -r1.65.18.1 src/sys/arch/amd64/amd64/cpufunc.S cvs rdiff -u -r1.49 -r1.49.20.1 src/sys/arch/i386/i386/cpufunc.S cvs rdiff -u -r1.55.4.3 -r1.55.4.4 src/sys/arch/xen/include/hypervisor.h cvs rdiff -u -r1.6.20.1 -r1.6.20.2 src/sys/arch/xen/include/xenring.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/common/lib/libc/arch/i386/atomic/atomic.S diff -u src/common/lib/libc/arch/i386/atomic/atomic.S:1.36 src/common/lib/libc/arch/i386/atomic/atomic.S:1.36.2.1 --- src/common/lib/libc/arch/i386/atomic/atomic.S:1.36 Sat Jul 30 14:11:00 2022 +++ src/common/lib/libc/arch/i386/atomic/atomic.S Sat Jul 20 16:11:26 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: atomic.S,v 1.36 2022/07/30 14:11:00 riastradh Exp $ */ +/* $NetBSD: atomic.S,v 1.36.2.1 2024/07/20 16:11:26 martin Exp $ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. @@ -211,6 +211,8 @@ ENTRY(_membar_sync) * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/ * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ * https://www.agner.org/optimize/instruction_tables.pdf + * + * Sync with xen_mb in sys/arch/i386/i386/cpufunc.S. */ LOCK addl $0, -4(%esp) Index: src/common/lib/libc/arch/x86_64/atomic/atomic.S diff -u src/common/lib/libc/arch/x86_64/atomic/atomic.S:1.29 src/common/lib/libc/arch/x86_64/atomic/atomic.S:1.29.2.1 --- src/common/lib/libc/arch/x86_64/atomic/atomic.S:1.29 Sat Jul 30 14:11:00 2022 +++ src/common/lib/libc/arch/x86_64/atomic/atomic.S Sat Jul 20 16:11:27 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: atomic.S,v 1.29 2022/07/30 14:11:00 riastradh Exp $ */ +/* $NetBSD: atomic.S,v 1.29.2.1 2024/07/20 16:11:27 martin Exp $ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. @@ -279,6 +279,8 @@ ENTRY(_membar_sync) * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/ * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ * https://www.agner.org/optimize/instruction_tables.pdf + * + * Sync with xen_mb in sys/arch/amd64/amd64/cpufunc.S. */ LOCK addq $0, -8(%rsp) Index: src/sys/arch/amd64/amd64/cpufunc.S diff -u src/sys/arch/amd64/amd64/cpufunc.S:1.65 src/sys/arch/amd64/amd64/cpufunc.S:1.65.18.1 --- src/sys/arch/amd64/amd64/cpufunc.S:1.65 Mon Nov 30 17:02:27 2020 +++ src/sys/arch/amd64/amd64/cpufunc.S Sat Jul 20 16:11:26 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: cpufunc.S,v 1.65 2020/11/30 17:02:27 bouyer Exp $ */ +/* $NetBSD: cpufunc.S,v 1.65.18.1 2024/07/20 16:11:26 martin Exp $ */ /* * Copyright (c) 1998, 2007, 2008, 2020 The NetBSD Foundation, Inc. @@ -60,6 +60,28 @@ ENTRY(x86_mfence) ret END(x86_mfence) +#ifdef XEN +ENTRY(xen_mb) + /* + * Store-before-load ordering with respect to matching logic + * on the hypervisor side. + * + * This is the same as membar_sync, but without hotpatching + * away the LOCK prefix on uniprocessor boots -- because under + * Xen, we still have to coordinate with a `device' backed by a + * hypervisor that is potentially on another physical CPU even + * if we observe only one virtual CPU as the guest. + * + * See common/lib/libc/arch/x86_64/atomic/atomic.S for + * rationale and keep this in sync with the implementation + * of membar_sync there. + */ + lock + addq $0,-8(%rsp) + ret +END(xen_mb) +#endif /* XEN */ + #ifndef XENPV ENTRY(invlpg) #ifdef SVS Index: src/sys/arch/i386/i386/cpufunc.S diff -u src/sys/arch/i386/i386/cpufunc.S:1.49 src/sys/arch/i386/i386/cpufunc.S:1.49.20.1 --- src/sys/arch/i386/i386/cpufunc.S:1.49 Sun Jul 19 07:35:08 2020 +++ src/sys/arch/i386/i386/cpufunc.S Sat Jul 20 16:11:26 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: cpufunc.S,v 1.49 2020/07/19 07:35:08 maxv Exp $ */ +/* $NetBSD: cpufunc.S,v 1.49.20.1 2024/07/20 16:11:26 martin Exp $ */ /*- * Copyright (c) 1998, 2007, 2020 The NetBSD Foundation, Inc. @@ -38,7 +38,7 @@ #include <sys/errno.h> #include <machine/asm.h> -__KERNEL_RCSID(0, "$NetBSD: cpufunc.S,v 1.49 2020/07/19 07:35:08 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpufunc.S,v 1.49.20.1 2024/07/20 16:11:26 martin Exp $"); #include "opt_xen.h" @@ -65,6 +65,28 @@ ENTRY(x86_mfence) ret END(x86_mfence) +#ifdef XEN +ENTRY(xen_mb) + /* + * Store-before-load ordering with respect to matching logic + * on the hypervisor side. + * + * This is the same as membar_sync, but without hotpatching + * away the LOCK prefix on uniprocessor boots -- because under + * Xen, we still have to coordinate with a `device' backed by a + * hypervisor that is potentially on another physical CPU even + * if we observe only one virtual CPU as the guest. + * + * See common/lib/libc/arch/i386/atomic/atomic.S for + * rationale and keep this in sync with the implementation + * of membar_sync there. + */ + lock + addl $0,-4(%esp) + ret +END(xen_mb) +#endif /* XEN */ + #ifndef XENPV ENTRY(lidt) movl 4(%esp), %eax Index: src/sys/arch/xen/include/hypervisor.h diff -u src/sys/arch/xen/include/hypervisor.h:1.55.4.3 src/sys/arch/xen/include/hypervisor.h:1.55.4.4 --- src/sys/arch/xen/include/hypervisor.h:1.55.4.3 Wed Oct 18 16:53:03 2023 +++ src/sys/arch/xen/include/hypervisor.h Sat Jul 20 16:11:26 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: hypervisor.h,v 1.55.4.3 2023/10/18 16:53:03 martin Exp $ */ +/* $NetBSD: hypervisor.h,v 1.55.4.4 2024/07/20 16:11:26 martin Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -112,7 +112,7 @@ struct xen_npx_attach_args { #undef xen_rmb #undef xen_wmb -#define xen_mb() membar_sync() +void xen_mb(void); #define xen_rmb() membar_acquire() #define xen_wmb() membar_release() #endif /* __XEN_INTERFACE_VERSION */ Index: src/sys/arch/xen/include/xenring.h diff -u src/sys/arch/xen/include/xenring.h:1.6.20.1 src/sys/arch/xen/include/xenring.h:1.6.20.2 --- src/sys/arch/xen/include/xenring.h:1.6.20.1 Mon Jul 31 15:23:02 2023 +++ src/sys/arch/xen/include/xenring.h Sat Jul 20 16:11:26 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: xenring.h,v 1.6.20.1 2023/07/31 15:23:02 martin Exp $ */ +/* $NetBSD: xenring.h,v 1.6.20.2 2024/07/20 16:11:26 martin Exp $ */ /* * Glue goop for xbd ring request/response protocol structures. @@ -24,7 +24,7 @@ #undef xen_rmb #undef xen_wmb -#define xen_mb() membar_sync() +void xen_mb(void); #define xen_rmb() membar_acquire() #define xen_wmb() membar_release()