Module Name:    src
Committed By:   martin
Date:           Sat Jul 20 16:11:27 UTC 2024

Modified Files:
        src/common/lib/libc/arch/i386/atomic [netbsd-10]: atomic.S
        src/common/lib/libc/arch/x86_64/atomic [netbsd-10]: atomic.S
        src/sys/arch/amd64/amd64 [netbsd-10]: cpufunc.S
        src/sys/arch/i386/i386 [netbsd-10]: cpufunc.S
        src/sys/arch/xen/include [netbsd-10]: hypervisor.h xenring.h

Log Message:
Pull up following revision(s) (requested by riastradh in ticket #764):

        common/lib/libc/arch/i386/atomic/atomic.S: revision 1.37
        sys/arch/xen/include/xenring.h: revision 1.8
        sys/arch/i386/i386/cpufunc.S: revision 1.52
        sys/arch/amd64/amd64/cpufunc.S: revision 1.68
        sys/arch/xen/include/hypervisor.h: revision 1.60
        common/lib/libc/arch/x86_64/atomic/atomic.S: revision 1.30

xen: Don't hotpatch away LOCK prefix in xen_mb, even on UP boots.

Both xen_mb and membar_sync are designed to provide store-before-load
ordering, but xen_mb has to provide it in synchronizing guest with
hypervisor, while membar_sync only has to provide it in synchronizing
one (guest) CPU with another (guest) CPU.

It is safe to hotpatch away the LOCK prefix in membar_sync on a
uniprocessor boot because membar_sync is only designed to coordinate
between normal memory on multiple CPUs, and is never necessary when
there's only one CPU involved.

But xen_mb is used to coordinate between the guest and the `device'
implemented by a hypervisor, which might be running on another
_physical_ CPU even if the NetBSD guest only sees one `CPU', i.e.,
one _virtual_ CPU.  So even on `uniprocessor' boots, xen_mb must
still issue an instruction with store-before-load ordering on
multiprocessor systems, such as a LOCK ADD (or MFENCE, but MFENCE is
costlier for no benefit here).

No need to change xen_wmb (release ordering, load/store-before-store)
or xen_rmb (acquire ordering, load-before-load/store) because every
x86 store is a store-release and every x86 load is a load-acquire,
even on multiprocessor systems, so there's no hotpatching involved
anyway.

PR kern/57199


To generate a diff of this commit:
cvs rdiff -u -r1.36 -r1.36.2.1 src/common/lib/libc/arch/i386/atomic/atomic.S
cvs rdiff -u -r1.29 -r1.29.2.1 \
    src/common/lib/libc/arch/x86_64/atomic/atomic.S
cvs rdiff -u -r1.65 -r1.65.18.1 src/sys/arch/amd64/amd64/cpufunc.S
cvs rdiff -u -r1.49 -r1.49.20.1 src/sys/arch/i386/i386/cpufunc.S
cvs rdiff -u -r1.55.4.3 -r1.55.4.4 src/sys/arch/xen/include/hypervisor.h
cvs rdiff -u -r1.6.20.1 -r1.6.20.2 src/sys/arch/xen/include/xenring.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/common/lib/libc/arch/i386/atomic/atomic.S
diff -u src/common/lib/libc/arch/i386/atomic/atomic.S:1.36 src/common/lib/libc/arch/i386/atomic/atomic.S:1.36.2.1
--- src/common/lib/libc/arch/i386/atomic/atomic.S:1.36	Sat Jul 30 14:11:00 2022
+++ src/common/lib/libc/arch/i386/atomic/atomic.S	Sat Jul 20 16:11:26 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: atomic.S,v 1.36 2022/07/30 14:11:00 riastradh Exp $	*/
+/*	$NetBSD: atomic.S,v 1.36.2.1 2024/07/20 16:11:26 martin Exp $	*/
 
 /*-
  * Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -211,6 +211,8 @@ ENTRY(_membar_sync)
 	 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
 	 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
 	 * https://www.agner.org/optimize/instruction_tables.pdf
+	 *
+	 * Sync with xen_mb in sys/arch/i386/i386/cpufunc.S.
 	 */
 	LOCK
 	addl	$0, -4(%esp)

Index: src/common/lib/libc/arch/x86_64/atomic/atomic.S
diff -u src/common/lib/libc/arch/x86_64/atomic/atomic.S:1.29 src/common/lib/libc/arch/x86_64/atomic/atomic.S:1.29.2.1
--- src/common/lib/libc/arch/x86_64/atomic/atomic.S:1.29	Sat Jul 30 14:11:00 2022
+++ src/common/lib/libc/arch/x86_64/atomic/atomic.S	Sat Jul 20 16:11:27 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: atomic.S,v 1.29 2022/07/30 14:11:00 riastradh Exp $	*/
+/*	$NetBSD: atomic.S,v 1.29.2.1 2024/07/20 16:11:27 martin Exp $	*/
 
 /*-
  * Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -279,6 +279,8 @@ ENTRY(_membar_sync)
 	 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
 	 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
 	 * https://www.agner.org/optimize/instruction_tables.pdf
+	 *
+	 * Sync with xen_mb in sys/arch/amd64/amd64/cpufunc.S.
 	 */
 	LOCK
 	addq	$0, -8(%rsp)

Index: src/sys/arch/amd64/amd64/cpufunc.S
diff -u src/sys/arch/amd64/amd64/cpufunc.S:1.65 src/sys/arch/amd64/amd64/cpufunc.S:1.65.18.1
--- src/sys/arch/amd64/amd64/cpufunc.S:1.65	Mon Nov 30 17:02:27 2020
+++ src/sys/arch/amd64/amd64/cpufunc.S	Sat Jul 20 16:11:26 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpufunc.S,v 1.65 2020/11/30 17:02:27 bouyer Exp $	*/
+/*	$NetBSD: cpufunc.S,v 1.65.18.1 2024/07/20 16:11:26 martin Exp $	*/
 
 /*
  * Copyright (c) 1998, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -60,6 +60,28 @@ ENTRY(x86_mfence)
 	ret
 END(x86_mfence)
 
+#ifdef XEN
+ENTRY(xen_mb)
+	/*
+	 * Store-before-load ordering with respect to matching logic
+	 * on the hypervisor side.
+	 *
+	 * This is the same as membar_sync, but without hotpatching
+	 * away the LOCK prefix on uniprocessor boots -- because under
+	 * Xen, we still have to coordinate with a `device' backed by a
+	 * hypervisor that is potentially on another physical CPU even
+	 * if we observe only one virtual CPU as the guest.
+	 *
+	 * See common/lib/libc/arch/x86_64/atomic/atomic.S for
+	 * rationale and keep this in sync with the implementation
+	 * of membar_sync there.
+	 */
+	lock
+	addq	$0,-8(%rsp)
+	ret
+END(xen_mb)
+#endif	/* XEN */
+
 #ifndef XENPV
 ENTRY(invlpg)
 #ifdef SVS

Index: src/sys/arch/i386/i386/cpufunc.S
diff -u src/sys/arch/i386/i386/cpufunc.S:1.49 src/sys/arch/i386/i386/cpufunc.S:1.49.20.1
--- src/sys/arch/i386/i386/cpufunc.S:1.49	Sun Jul 19 07:35:08 2020
+++ src/sys/arch/i386/i386/cpufunc.S	Sat Jul 20 16:11:26 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpufunc.S,v 1.49 2020/07/19 07:35:08 maxv Exp $	*/
+/*	$NetBSD: cpufunc.S,v 1.49.20.1 2024/07/20 16:11:26 martin Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2007, 2020 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
 #include <sys/errno.h>
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: cpufunc.S,v 1.49 2020/07/19 07:35:08 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpufunc.S,v 1.49.20.1 2024/07/20 16:11:26 martin Exp $");
 
 #include "opt_xen.h"
 
@@ -65,6 +65,28 @@ ENTRY(x86_mfence)
 	ret
 END(x86_mfence)
 
+#ifdef XEN
+ENTRY(xen_mb)
+	/*
+	 * Store-before-load ordering with respect to matching logic
+	 * on the hypervisor side.
+	 *
+	 * This is the same as membar_sync, but without hotpatching
+	 * away the LOCK prefix on uniprocessor boots -- because under
+	 * Xen, we still have to coordinate with a `device' backed by a
+	 * hypervisor that is potentially on another physical CPU even
+	 * if we observe only one virtual CPU as the guest.
+	 *
+	 * See common/lib/libc/arch/i386/atomic/atomic.S for
+	 * rationale and keep this in sync with the implementation
+	 * of membar_sync there.
+	 */
+	lock
+	addl	$0,-4(%esp)
+	ret
+END(xen_mb)
+#endif	/* XEN */
+
 #ifndef XENPV
 ENTRY(lidt)
 	movl	4(%esp), %eax

Index: src/sys/arch/xen/include/hypervisor.h
diff -u src/sys/arch/xen/include/hypervisor.h:1.55.4.3 src/sys/arch/xen/include/hypervisor.h:1.55.4.4
--- src/sys/arch/xen/include/hypervisor.h:1.55.4.3	Wed Oct 18 16:53:03 2023
+++ src/sys/arch/xen/include/hypervisor.h	Sat Jul 20 16:11:26 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: hypervisor.h,v 1.55.4.3 2023/10/18 16:53:03 martin Exp $	*/
+/*	$NetBSD: hypervisor.h,v 1.55.4.4 2024/07/20 16:11:26 martin Exp $	*/
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -112,7 +112,7 @@ struct xen_npx_attach_args {
 #undef xen_rmb
 #undef xen_wmb
 
-#define xen_mb()  membar_sync()
+void xen_mb(void);
 #define xen_rmb() membar_acquire()
 #define xen_wmb() membar_release()
 #endif /* __XEN_INTERFACE_VERSION */

Index: src/sys/arch/xen/include/xenring.h
diff -u src/sys/arch/xen/include/xenring.h:1.6.20.1 src/sys/arch/xen/include/xenring.h:1.6.20.2
--- src/sys/arch/xen/include/xenring.h:1.6.20.1	Mon Jul 31 15:23:02 2023
+++ src/sys/arch/xen/include/xenring.h	Sat Jul 20 16:11:26 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: xenring.h,v 1.6.20.1 2023/07/31 15:23:02 martin Exp $ */
+/* $NetBSD: xenring.h,v 1.6.20.2 2024/07/20 16:11:26 martin Exp $ */
 
 /*
  * Glue goop for xbd ring request/response protocol structures.
@@ -24,7 +24,7 @@
 #undef xen_rmb
 #undef xen_wmb
 
-#define xen_mb()  membar_sync()
+void xen_mb(void);
 #define xen_rmb() membar_acquire()
 #define xen_wmb() membar_release()
 

Reply via email to