Get rid of the x86 SSE code for atomic 64-bit writes to doorbell registers. Saving/setting CR0 plus a clts instruction are too expensive for it to ever be a win, and the config option was just confusing.
Signed-off-by: Roland Dreier <[EMAIL PROTECTED]> --- linux-bk.orig/drivers/infiniband/hw/mthca/Kconfig 2005-01-23 08:30:27.000000000 -0800 +++ linux-bk/drivers/infiniband/hw/mthca/Kconfig 2005-01-23 21:00:44.744520064 -0800 @@ -14,13 +14,3 @@ This option causes the mthca driver produce a bunch of debug messages. Select this is you are developing the driver or trying to diagnose a problem. - -config INFINIBAND_MTHCA_SSE_DOORBELL - bool "SSE doorbell code" - depends on INFINIBAND_MTHCA && X86 && !X86_64 - default n - ---help--- - This option will have the mthca driver use SSE instructions - to ring hardware doorbell registers. This may improve - performance for some workloads, but the driver will not run - on processors without SSE instructions. --- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 20:58:55.771086544 -0800 +++ linux-bk/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 21:00:44.745519912 -0800 @@ -40,10 +40,6 @@ #include <linux/pci.h> #include <linux/interrupt.h> -#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL -#include <asm/cpufeature.h> -#endif - #include "mthca_dev.h" #include "mthca_config_reg.h" #include "mthca_cmd.h" @@ -1117,22 +1113,6 @@ { int ret; - /* - * TODO: measure whether dynamically choosing doorbell code at - * runtime affects our performance. Is there a "magic" way to - * choose without having to follow a function pointer every - * time we ring a doorbell? - */ -#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL - if (!cpu_has_xmm) { - printk(KERN_ERR PFX "mthca was compiled with SSE doorbell code, but\n"); - printk(KERN_ERR PFX "the current CPU does not support SSE.\n"); - printk(KERN_ERR PFX "Turn off CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL " - "and recompile.\n"); - return -ENODEV; - } -#endif - ret = pci_register_driver(&mthca_driver); return ret < 0 ? ret : 0; } --- linux-bk.orig/drivers/infiniband/hw/mthca/mthca_doorbell.h 2005-01-23 08:30:38.000000000 -0800 +++ linux-bk/drivers/infiniband/hw/mthca/mthca_doorbell.h 2005-01-23 21:00:44.746519760 -0800 @@ -32,9 +32,7 @@ * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $ */ -#include <linux/config.h> #include <linux/types.h> -#include <linux/preempt.h> #define MTHCA_RD_DOORBELL 0x00 #define MTHCA_SEND_DOORBELL 0x10 @@ -59,51 +57,13 @@ __raw_writeq(*(u64 *) val, dest); } -#elif defined(CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL) -/* Use SSE to write 64 bits atomically without a lock. */ - -#define MTHCA_DECLARE_DOORBELL_LOCK(name) -#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0) -#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL) - -static inline unsigned long mthca_get_fpu(void) -{ - unsigned long cr0; - - preempt_disable(); - asm volatile("mov %%cr0,%0; clts" : "=r" (cr0)); - return cr0; -} - -static inline void mthca_put_fpu(unsigned long cr0) -{ - asm volatile("mov %0,%%cr0" : : "r" (cr0)); - preempt_enable(); -} - -static inline void mthca_write64(u32 val[2], void __iomem *dest, - spinlock_t *doorbell_lock) -{ - /* i386 stack is aligned to 8 bytes, so this should be OK: */ - u8 xmmsave[8] __attribute__((aligned(8))); - unsigned long cr0; - - cr0 = mthca_get_fpu(); - - asm volatile ( - "movlps %%xmm0,(%0); \n\t" - "movlps (%1),%%xmm0; \n\t" - "movlps %%xmm0,(%2); \n\t" - "movlps (%0),%%xmm0; \n\t" - : - : "r" (xmmsave), "r" (val), "r" (dest) - : "memory" ); - - mthca_put_fpu(cr0); -} - #else -/* Just fall back to a spinlock to protect the doorbell */ + +/* + * Just fall back to a spinlock to protect the doorbell if + * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit + * MMIO writes. + */ #define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name; #define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/