Module Name:    src
Committed By:   riastradh
Date:           Thu Apr 21 12:06:32 UTC 2022

Modified Files:
        src/common/lib/libc/arch/mips/atomic: membar_ops.S
        src/sys/arch/mips/include: asm.h

Log Message:
mips/cavium: Take advantage of Octeon's guaranteed r/rw ordering.


To generate a diff of this commit:
cvs rdiff -u -r1.12 -r1.13 src/common/lib/libc/arch/mips/atomic/membar_ops.S
cvs rdiff -u -r1.70 -r1.71 src/sys/arch/mips/include/asm.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/common/lib/libc/arch/mips/atomic/membar_ops.S
diff -u src/common/lib/libc/arch/mips/atomic/membar_ops.S:1.12 src/common/lib/libc/arch/mips/atomic/membar_ops.S:1.13
--- src/common/lib/libc/arch/mips/atomic/membar_ops.S:1.12	Sat Apr  9 23:32:51 2022
+++ src/common/lib/libc/arch/mips/atomic/membar_ops.S	Thu Apr 21 12:06:31 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: membar_ops.S,v 1.12 2022/04/09 23:32:51 riastradh Exp $	*/
+/*	$NetBSD: membar_ops.S,v 1.13 2022/04/21 12:06:31 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
@@ -38,44 +38,80 @@ LEAF(_membar_sync)
 	j	ra
 	 BDSYNC
 END(_membar_sync)
+ATOMIC_OP_ALIAS(membar_sync,_membar_sync)
+
+STRONG_ALIAS(_membar_enter,_membar_sync)
+ATOMIC_OP_ALIAS(membar_enter,_membar_sync)
 
 #ifdef __OCTEON__
+
+/*
+ * cnMIPS guarantees load-before-load/store ordering without any
+ * barriers.  So the only barriers we need are store-before-load (sync)
+ * and store-before-store (syncw, i.e., sync 4).  See Table 2-32
+ * `Execution Ordering Rules' on p. 104 of Cavium OCTEON III CN78XX
+ * Hardware Reference Manual, CN78XX-HM-0.99E, September 2014:
+ *
+ *	First Operation		DLD [load instruction to a physical
+ *				address that is L2/DRAM]
+ *	Second Operation	Any
+ *	Execution Ordering Comments
+ *
+ *		The second operation cannot appear to execute before
+ *		the first (DLD) operation, regardless of the presence
+ *		or absence of SYNC* instructions.
+ *
+ * Note: I'm not sure if this applies to earlier cnMIPS -- can't find
+ * it in the Cavium Networks OCTEON Plus CN50XX Hardware Reference
+ * Manual CN50XX-HM-0.99E, July 2008.  Experimentally, on an erlite3
+ * (Cavium Octeon CN5020-500), I can easily detect reordering of
+ * store-before-store and store-before-load, but I haven't been able to
+ * detect any reordering of load-before-load or load-before-store.
+ *
+ * Note: On early cnMIPS (CN3xxx), there is an erratum which sometimes
+ * requires issuing two syncw's in a row.  I don't know the details --
+ * don't have documentation -- and in Linux it is only used for I/O
+ * purposes.
+ *
+ * Currently we don't build kernels that work on both Octeon and
+ * non-Octeon MIPS CPUs, so none of this is done with binary patching.
+ * For userlands we could use a separate shared library on Octeon with
+ * ld.so.conf to override the symbols with cheaper definitions, but we
+ * don't do that now.
+ */
+
+LEAF(_membar_acquire)
+	j	ra
+	 nop
+END(_membar_acquire)
+ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire)
+
+STRONG_ALIAS(_membar_consumer,_membar_acquire)
+ATOMIC_OP_ALIAS(membar_consumer,_membar_acquire)
+
 LEAF(_membar_release)
-	/*
-	 * syncw is documented as ordering store-before-store in
-	 *
-	 *	Cavium OCTEON III CN78XX Hardware Reference Manual,
-	 *	CN78XX-HM-0.99E, September 2014.
-	 *
-	 * It's unclear from the documentation the architecture
-	 * guarantees load-before-store ordering without barriers, but
-	 * this code assumes it does.  If that assumption is wrong, we
-	 * can only use syncw for membar_producer -- membar_release has
-	 * to use the full sync.
-	 */
 	j	ra
 	 syncw
 END(_membar_release)
-#endif
+ATOMIC_OP_ALIAS(membar_release,_membar_release)
 
-ATOMIC_OP_ALIAS(membar_sync,_membar_sync)
-ATOMIC_OP_ALIAS(membar_acquire,_membar_sync)
-STRONG_ALIAS(_membar_acquire,_membar_sync)
-ATOMIC_OP_ALIAS(membar_enter,_membar_sync)
-STRONG_ALIAS(_membar_enter,_membar_sync)
-#ifdef __OCTEON__
-ATOMIC_OP_ALIAS(membar_exit,_membar_release)
 STRONG_ALIAS(_membar_exit,_membar_release)
-ATOMIC_OP_ALIAS(membar_release,_membar_release)
-ATOMIC_OP_ALIAS(membar_producer,_membar_release)
+ATOMIC_OP_ALIAS(membar_exit,_membar_release)
+
 STRONG_ALIAS(_membar_producer,_membar_release)
-#else
-ATOMIC_OP_ALIAS(membar_exit,_membar_sync)
-STRONG_ALIAS(_membar_exit,_membar_sync)
-ATOMIC_OP_ALIAS(membar_release,_membar_sync)
+ATOMIC_OP_ALIAS(membar_producer,_membar_release)
+
+#else  /* !__OCTEON__ */
+
+STRONG_ALIAS(_membar_acquire,_membar_sync)
+ATOMIC_OP_ALIAS(membar_acquire,_membar_sync)
 STRONG_ALIAS(_membar_release,_membar_sync)
-ATOMIC_OP_ALIAS(membar_producer,_membar_sync)
+ATOMIC_OP_ALIAS(membar_release,_membar_sync)
+STRONG_ALIAS(_membar_exit,_membar_sync)
+ATOMIC_OP_ALIAS(membar_exit,_membar_sync)
+STRONG_ALIAS(_membar_consumer,_membar_sync)
+ATOMIC_OP_ALIAS(membar_consumer,_membar_sync)
 STRONG_ALIAS(_membar_producer,_membar_sync)
+ATOMIC_OP_ALIAS(membar_producer,_membar_sync)
+
 #endif
-ATOMIC_OP_ALIAS(membar_consumer,_membar_sync)
-STRONG_ALIAS(_membar_consumer,_membar_sync)

Index: src/sys/arch/mips/include/asm.h
diff -u src/sys/arch/mips/include/asm.h:1.70 src/sys/arch/mips/include/asm.h:1.71
--- src/sys/arch/mips/include/asm.h:1.70	Sat Apr  9 14:09:32 2022
+++ src/sys/arch/mips/include/asm.h	Thu Apr 21 12:06:31 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: asm.h,v 1.70 2022/04/09 14:09:32 riastradh Exp $	*/
+/*	$NetBSD: asm.h,v 1.71 2022/04/21 12:06:31 riastradh Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
@@ -574,12 +574,21 @@ _C_LABEL(x):
 
 /* compiler define */
 #if defined(__OCTEON__)
-				/* early cnMIPS have erratum which means 2 */
-#define	LLSCSYNC	sync 4; sync 4
+/*
+ * See common/lib/libc/arch/mips/atomic/membar_ops.S for notes on
+ * Octeon memory ordering guarantees and barriers.
+ *
+ * cnMIPS also has a quirk where the store buffer can get clogged and
+ * we need to apply a plunger to it _after_ releasing a lock or else
+ * other CPUs may spin for hundreds of thousands of cycles before they
+ * see the lock is released.  So we also have the quirky SYNC_PLUNGER
+ * barrier as syncw.
+ */
+#define	LLSCSYNC	/* nothing */
 #define	BDSYNC		sync
-#define	BDSYNC_ACQ	sync
-#define	SYNC_ACQ	sync
-#define	SYNC_REL	sync
+#define	BDSYNC_ACQ	nop
+#define	SYNC_ACQ	/* nothing */
+#define	SYNC_REL	sync 4
 #define	BDSYNC_PLUNGER	sync 4
 #define	SYNC_PLUNGER	sync 4
 #elif __mips >= 3 || !defined(__mips_o32)

Reply via email to