On Sat, Nov 22, 2008 at 03:05:22PM -0700, Scott Long wrote:
> A neat hack would be for the kernel linker to scan the text and do a
> drop-in replacement of the opcode that is appropriate for the platform.
> I can't see how a CPU_XXX definition would work because it's just a
> compile time construct, one that can be included with any kernel
> compile.

Yes, it is possible to do that. Less drastic change is to directly
check features. I moved slow code to separate section to eliminate
unconditional jump in fast path.
Only compile-tested.

diff --git a/sys/conf/ldscript.i386 b/sys/conf/ldscript.i386
index a94f32f..49d9636 100644
--- a/sys/conf/ldscript.i386
+++ b/sys/conf/ldscript.i386
@@ -45,6 +45,7 @@ SECTIONS
   .text      :
   {
     *(.text)
+    *(.text.offpath)
     *(.stub)
     /* .gnu.warning sections are handled specially by elf32.em.  */
     *(.gnu.warning)
diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h
index f6bcf0c..5806d41 100644
--- a/sys/i386/include/atomic.h
+++ b/sys/i386/include/atomic.h
@@ -32,11 +32,38 @@
 #error this file needs sys/cdefs.h as a prerequisite
 #endif
 
-
+#ifdef _KERNEL
 #if defined(I686_CPU)
-#define mb()   __asm__ __volatile__ ("mfence;": : :"memory")
-#define wmb()  __asm__ __volatile__ ("sfence;": : :"memory")
-#define rmb()  __asm__ __volatile__ ("lfence;": : :"memory")
+#define mb()   __asm__ __volatile__ (                  \
+       "testl\t$0x04000000,cpu_feature\n"              \
+       "\tje\t2f\n"                                    \
+       "\tmfence\n"                                    \
+       "1:\n"                                          \
+       "\t.section\t.text.offpath\n"                   \
+       "2:\tlock;addl\t$0,cpu_feature\n"               \
+       "\tjmp\t1b\n"                                   \
+       "\t.text"                                       \
+       : : :"memory")
+#define wmb()  __asm__ __volatile__ (                  \
+       "testl\t$0x02000000,cpu_feature\n"              \
+       "\tje\t2f\n"                                    \
+       "\tsfence\n"                                    \
+       "1:\n"                                          \
+       "\t.section\t.text.offpath\n"                   \
+       "2:\tlock;addl\t$0,cpu_feature\n"               \
+       "\tjmp\t1b\n"                                   \
+       "\t.text"                                       \
+       : : :"memory")
+#define rmb()  __asm__ __volatile__ (                  \
+       "testl\t$0x04000000,cpu_feature\n"              \
+       "\tje\t2f\n"                                    \
+       "\tlfence\n"                                    \
+       "1:\n"                                          \
+       "\t.section\t.text.offpath\n"                   \
+       "2:\tlock;addl\t$0,cpu_feature\n"               \
+       "\tjmp\t1b\n"                                   \
+       "\t.text"                                       \
+       : : :"memory")
 #else
 /*
  * do we need a serializing instruction?
@@ -45,7 +72,7 @@
 #define wmb()
 #define rmb()
 #endif
-
+#endif
 
 /*
  * Various simple operations on memory, each of which is atomic in the

Attachment: pgpnsZga0lo9O.pgp
Description: PGP signature

Reply via email to