Hi,

As I've said before, I've been working on MIPS support
for OpenMPI, as the current implementation is
Irix-specific in places. Well, it is finally done and
I present to you fixes for Linux on MIPS, some fixes
for atomic operations bugs on the MIPS platform, and a
fix for a GCC bug where it doesn't handle macros
unless the file extension is .S, rather than .s.

Enjoy!

Jonathan Day


__________________________________________________
Do You Yahoo!?
Tired of spam?  Yahoo! Mail has the best spam protection around 
http://mail.yahoo.com 
--- ompi-original/opal/include/opal/sys/mips/atomic.h	2006-05-10 17:30:39.000000000 +0000
+++ patched/opal/include/opal/sys/mips/atomic.h	2006-05-11 02:12:28.000000000 +0000
@@ -47,14 +47,20 @@

 #define OPAL_HAVE_ATOMIC_CMPSET_32 1
 #define OPAL_HAVE_ATOMIC_CMPSET_64 1
+#define OPAL_HAVE_ATOMIC_MATH_32 0
+#define OPAL_HAVE_ATOMIC_ADD_32 1
+#define OPAL_HAVE_ATOMIC_SUB_32 1
+#define OPAL_HAVE_ATOMIC_ADD_64 1
+#define OPAL_HAVE_ATOMIC_SUB_64 1


+#if OMPI_GCC_INLINE_ASSEMBLY
+
 /**********************************************************************
  *
  * Memory Barriers
  *
  *********************************************************************/
-#if OMPI_GCC_INLINE_ASSEMBLY

 static inline
 void opal_atomic_mb(void)
@@ -76,14 +82,11 @@
     WMB();
 }

-#endif
-
 /**********************************************************************
  *
  * Atomic math operations
  *
  *********************************************************************/
-#if OMPI_GCC_INLINE_ASSEMBLY

 static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
                                         int32_t oldval, int32_t newval)
@@ -92,19 +95,22 @@
     int32_t tmp;

    __asm__ __volatile__ ("\t"
-                         ".set noreorder        \n"
-                         "1:                \n\t"
-                         "ll     %0, %2         \n\t" /* load *addr into ret */
-                         "bne    %0, %3, 2f   \n\t" /* done if oldval != ret */
-                         "or     %5, %4, 0      \n\t" /* ret = newval */
-                         "sc     %5, %2         \n\t" /* store ret in *addr */
-                         /* note: ret will be 0 if failed, 1 if succeeded */
-			 "bne    %5, 1, 1b   \n\t"
-                         "2:                 \n\t"
-                         ".set reorder          \n"
-                         : "=&r"(ret), "=m"(*addr)
-                         : "m"(*addr), "r"(oldval), "r"(newval), "r"(tmp)
+                         ".set noreorder\n"
+                         "1:\n\t"
+                         "ll      %0, 0(%2)\n\t"    /* load-linked *addr into ret */
+                         "bne     %0, %3, 2f\n\t"   /* return 0 if oldval != ret */
+                         "or      %1, $0, %4\n\t"   /* tmp = newval */
+                         "sc      %1, 0(%2)\n\t"    /* store-conditional tmp into *addr */
+                         /* note: tmp will be 0 if store failed, 1 if succeeded */
+                         "beq     %1, $0, 1b\n\t"   /* repeat if tmp == 0 */
+                         "nop\n\t"
+                         "sync\n"
+                         "2:\n\t"
+                         ".set reorder\n"
+                         : "=&r"(ret), "=&r"(tmp)
+                         : "r"(addr), "r"(oldval), "r"(newval)
                          : "cc", "memory");
+
    return (ret == oldval);
 }

@@ -141,19 +147,20 @@
     int64_t tmp;

    __asm__ __volatile__ ("\t"
-                         ".set noreorder        \n"
-                         "1:                \n\t"
-                         "lld    %0, %2         \n\t" /* load *addr into ret */
-                         "bne    %0, %3, 2f   \n\t" /* done if oldval != ret */
-                         "or     %5, %4, 0      \n\t" /* tmp = newval */
-                         "scd    %5, %2         \n\t" /* store tmp in *addr */
-                         /* note: ret will be 0 if failed, 1 if succeeded */
-			 "bne    %5, 1, 1b   \n"
-                         "2:                 \n\t"
-                         ".set reorder          \n"
-                         : "=&r" (ret), "=m" (*addr)
-                         : "m" (*addr), "r" (oldval), "r" (newval),
-			   "r"(tmp)
+                         ".set noreorder\n"
+                         "1:\n\t"
+                         "lld     %0, 0(%2)\n\t"    /* load-linked *addr into ret */
+                         "bne     %0, %3, 2f\n\t"   /* return 0 if oldval != ret */
+                         "or      %1, $0, %4\n\t"   /* tmp = newval */
+                         "scd     %1, 0(%2)\n\t"    /* store-conditional tmp into *addr */
+                         /* note: tmp will be 0 if store failed, 1 if succeeded */
+                         "beq     %1, $0, 1b\n\t"   /* repeat if tmp == 0 */
+                         "nop\n\t"
+                         "sync\n"
+                         "2:\n\t"
+                         ".set reorder\n"
+                         : "=&r"(ret), "=&r"(tmp)
+                         : "r"(addr), "r"(oldval), "r"(newval)
                          : "cc", "memory");

    return (ret == oldval);
@@ -184,6 +191,114 @@
     return opal_atomic_cmpset_64(addr, oldval, newval);
 }

+#if ! OPAL_HAVE_ATOMIC_MATH_32
+static inline
+#endif
+int32_t opal_atomic_add_32(volatile int32_t *addr, int delta)
+{
+    int32_t ret;
+    int32_t tmp;
+
+   __asm__ __volatile__ ("\t"
+                         ".set noreorder\n"
+                         "1:\n\t"
+                         "ll      %0, 0(%2)\n\t"    /* load-linked ret = *addr */
+                         "addu    %0, %0, %3\n\t"   /* ret += delta */
+                         "or      %1, %0, $0\n\t"   /* tmp = ret */
+                         "sc      %1, 0(%2)\n\t"    /* store-conditional *addr = tmp */
+                         /* note: tmp will be 0 if store failed, 1 if succeeded */
+                         "beq     %1, $0, 1b\n\t"   /* repeat if store-conditional failed */
+                         "nop\n\t"
+                         "sync\n"
+                         ".set reorder\n"
+                         : "=&r"(ret), "=&r"(tmp)
+                         : "r"(addr), "r"(delta)
+                         : "cc", "memory");
+
+   return ret;
+} /* opal_atomic_add_32 */
+
+#if ! OPAL_HAVE_ATOMIC_MATH_32
+static inline
+#endif
+int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta)
+{
+    int32_t ret;
+    int32_t tmp;
+
+   __asm__ __volatile__ ("\t"
+                         ".set noreorder\n"
+                         "1:\n\t"
+                         "ll      %0, 0(%2)\n\t"    /* load-linked ret = *addr */
+                         "subu    %0, %0, %3\n\t"   /* ret -= delta */
+                         "or      %1, %0, $0\n\t"   /* tmp = ret */
+                         "sc      %1, 0(%2)\n\t"    /* store-conditional *addr = tmp */
+                         /* note: tmp will be 0 if store failed, 1 if succeeded */
+                         "beq     %1, $0, 1b\n\t"   /* repeat if store-conditional failed */
+                         "nop\n\t"
+                         "sync\n"
+                         ".set reorder\n"
+                         : "=&r"(ret), "=&r"(tmp)
+                         : "r"(addr), "r"(delta)
+                         : "cc", "memory");
+
+   return ret;
+} /* opal_atomic_sub_32 */
+
+#if OPAL_HAVE_ATOMIC_CMPSET_64
+static inline
+#endif
+int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta)
+{
+    int64_t ret;
+    int64_t tmp;
+
+   __asm__ __volatile__ ("\t"
+                         ".set noreorder\n"
+                         "1:\n\t"
+                         "lld     %0, 0(%2)\n\t"    /* load-linked ret = *addr */
+                         "daddu   %0, %0, %3\n\t"   /* ret += delta */
+                         "or      %1, %0, $0\n\t"   /* tmp = ret */
+                         "scd     %1, 0(%2)\n\t"    /* store-conditional *addr = tmp */
+                         /* note: tmp will be 0 if store failed, 1 if succeeded */
+                         "beq     %1, $0, 1b\n\t"   /* repeat if store-conditional failed */
+                         "nop\n\t"
+                         "sync\n"
+                         ".set reorder\n"
+                         : "=&r"(ret), "=&r"(tmp)
+                         : "r"(addr), "r"(delta)
+                         : "cc", "memory");
+
+   return ret;
+} /* opal_atomic_add_64 */
+
+#if OPAL_HAVE_ATOMIC_CMPSET_64
+static inline
+#endif
+int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta)
+{
+    int64_t ret;
+    int64_t tmp;
+
+   __asm__ __volatile__ ("\t"
+                         ".set noreorder\n"
+                         "1:\n\t"
+                         "lld     %0, 0(%2)\n\t"    /* load-linked ret = *addr */
+                         "dsubu   %0, %0, %3\n\t"   /* ret -= delta */
+                         "or      %1, %0, $0\n\t"   /* tmp = ret */
+                         "scd     %1, 0(%2)\n\t"    /* store-conditional *addr = tmp */
+                         /* note: tmp will be 0 if store failed, 1 if succeeded */
+                         "beq     %1, $0, 1b\n\t"   /* repeat if store-conditional failed */
+                         "nop\n\t"
+                         "sync\n"
+                         ".set reorder\n"
+                         : "=&r"(ret), "=&r"(tmp)
+                         : "r"(addr), "r"(delta)
+                         : "cc", "memory");
+
+   return ret;
+} /* opal_atomic_sub_64 */
+
 #endif /* OMPI_GCC_INLINE_ASSEMBLY */

 #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */
--- ompi-original/opal/asm/Makefile.am	2006-05-10 17:30:27.000000000 +0000
+++ patched/opal/asm/Makefile.am	2006-05-10 21:30:56.000000000 +0000
@@ -31,6 +31,14 @@

 atomic-asm.s: generated/@OMPI_ASM_FILE@
 	rm -f atomic-asm.s
+	@ if test "$(OMPI_ASM_PRECOMPILE)" = "1" ; then \
+		cmd="mv generated/@OMPI_ASM_FILE@ generated/original.S" ; \
+	    	echo "$$cmd" ; \
+		eval $$cmd ; \
+		cmd="$(CC) -D$(OMPI_ASM_ASM_H) -S generated/original.S > generated/@OMPI_ASM_FILE@" ; \
+	    	echo "$$cmd" ; \
+		eval $$cmd ; \
+	fi
 	@ if test -f "$(top_builddir)/opal/asm/generated/@OMPI_ASM_FILE@" ; then \
 	    cmd="ln -s \"$(top_builddir)/opal/asm/generated/@OMPI_ASM_FILE@\" atomic-asm.s" ; \
 	    echo "$$cmd" ; \
--- ompi-original/opal/asm/base/MIPS.asm	2006-05-10 17:30:27.000000000 +0000
+++ patched/opal/asm/base/MIPS.asm	2006-05-11 18:41:56.000000000 +0000
@@ -1,130 +1,195 @@
 START_FILE

+#ifdef OMPI_ASM_ASM_H
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#else
 #include <asm.h>
 #include <regdef.h>
-	
-	TEXT
+#endif
+        
+        TEXT

-	ALIGN(8)
+        ALIGN(8)
 LEAF(opal_atomic_mb)
-	sync
-	j	ra
+        j       $31
+        sync
 END(opal_atomic_mb)

-	
-	ALIGN(8)
+        
+        ALIGN(8)
 LEAF(opal_atomic_rmb)
-	sync
-	j	ra
+        j       $31
+        sync
 END(opal_atomic_rmb)
-	
-	
+        
+        
 LEAF(opal_atomic_wmb)
-	sync
-	j	ra
+        j       $31
+        sync
 END(opal_atomic_wmb)


 LEAF(opal_atomic_cmpset_32)
-	.set noreorder        
-retry1:                
-	ll     $3, 0($4)         
-	bne    $3, $5, done1   
-	or     $2, $6, 0      
-	sc     $2, 0($4)         
-	bne    $2, 1, retry1
-done1:                 
-	.set reorder          
-
-	xor	$3,$3,$5
-	j	ra
-	sltu	$2,$3,1
+.set noreorder
+retry1:
+        ll      $3, 0($4)
+        bne     $3, $5, done1
+        or      $2, $0, $6
+        sc      $2, 0($4)
+        beq     $2, $0, retry1
+        nop
+        j       $31
+        sync
+done1:
+        j       $31
+        or      $2, $0, $0
+.set reorder
 END(opal_atomic_cmpset_32)


 LEAF(opal_atomic_cmpset_acq_32)
-	.set noreorder        
-retry2:                
-	ll     $3, 0($4)         
-	bne    $3, $5, done2   
-	or     $2, $6, 0      
-	sc     $2, 0($4)         
-	bne    $2, 1, retry2   
-done2:                 
-	sync
-	.set reorder          
-
-	xor	$3,$3,$5
-	j	ra
-	sltu	$2,$3,1
+.set noreorder
+retry2:
+        ll      $3, 0($4)
+        bne     $3, $5, done2
+        or      $2, $0, $6
+        sc      $2, 0($4)
+        beq     $2, $0, retry2
+        nop
+        j       $31
+        sync
+done2:
+        j       $31
+        or      $2, $0, $0
+.set reorder
 END(opal_atomic_cmpset_acq_32)

-	
+        
 LEAF(opal_atomic_cmpset_rel_32)
-	.set noreorder        
-	sync
-retry3:                
-	ll     $3, 0($4)         
-	bne    $3, $5, done3   
-	or     $2, $6, 0      
-	sc     $2, 0($4)         
-	bne    $2, 1, retry3   
-done3:                 
-	.set reorder          
-
-	xor	$3,$3,$5
-	j	ra
-	sltu	$2,$3,1
+.set noreorder
+retry3:
+        ll      $3, 0($4)
+        bne     $3, $5, done3
+        or      $2, $0, $6
+        sc      $2, 0($4)
+        beq     $2, $0, retry3
+        nop
+        j       $31
+        sync
+done3:
+        j       $31
+        or      $2, $0, $0
+.set reorder
 END(opal_atomic_cmpset_rel_32)
-	
-	
+        
+        
 LEAF(opal_atomic_cmpset_64)
-		.set noreorder        
-retry4:                
-	lld    $3, 0($4)         
-	bne    $3, $5, done4   
-	or     $2, $6, 0      
-	scd    $2, 0($4)         
-	bne    $2, 1, retry4   
-done4:                 
-	.set reorder          
-
-	xor	$4,$3,$5
-	j	ra
-	sltu	$3,$4,1
+.set noreorder
+retry4:
+        lld     $3, 0($4)
+        bne     $3, $5, done4
+        or      $2, $0, $6
+        scd     $2, 0($4)
+        beq     $2, $0, retry4
+        nop
+        j       $31
+        sync
+done4:
+        j       $31
+        or      $2, $0, $0
+.set reorder
 END(opal_atomic_cmpset_64)


 LEAF(opal_atomic_cmpset_acq_64)
-	.set noreorder        
-retry5:                
-	lld    $3, 0($4)         
-	bne    $3, $5, done5   
-	or     $2, $6, 0      
-	scd    $2, 0($4)         
-	bne    $2, 1, retry5   
-done5:                 
-	.set reorder          
-	sync
-	xor	$4,$3,$5
-	j	ra
-	sltu	$3,$4,1
+.set noreorder
+retry5:
+        lld     $3, 0($4)
+        bne     $3, $5, done5
+        or      $2, $0, $6
+        scd     $2, 0($4)
+        beq     $2, $0, retry5
+        nop
+        j       $31
+        sync
+done5:
+        j       $31
+        or      $2, $0, $0
+.set reorder
 END(opal_atomic_cmpset_acq_64)


 LEAF(opal_atomic_cmpset_rel_64)
-	.set noreorder        
-	sync
-retry6:                
-	lld    $3, 0($4)         
-	bne    $3, $5, done6   
-	or     $2, $6, 0      
-	scd    $2, 0($4)         
-	bne    $2, 1, retry6   
-done6:                 
-	.set reorder          
-
-	xor	$4,$3,$5
-	j	ra
-	sltu	$3,$4,1
+.set noreorder
+retry6:
+        lld     $3, 0($4)
+        bne     $3, $5, done6
+        or      $2, $0, $6
+        scd     $2, 0($4)
+        beq     $2, $0, retry6
+        nop
+        j       $31
+        sync
+done6:
+        j       $31
+        or      $2, $0, $0
+.set reorder
 END(opal_atomic_cmpset_rel_64)
+
+LEAF(opal_atomic_add_32)
+.set noreorder
+retry7:
+        ll      $2, 0($4)
+        addu    $2, $2, $5
+        or      $3, $2, $0
+        sc      $3, 0($4)
+        beq     $3, $0, retry7
+        nop
+        j       $31
+        sync
+.set reorder
+END(opal_atomic_add_32)
+
+LEAF(opal_atomic_sub_32)
+.set noreorder
+retry8:
+        ll      $2, 0($4)
+        subu    $2, $2, $5
+        or      $3, $2, $0
+        sc      $3, 0($4)
+        beq     $3, $0, retry8
+        nop
+        j       $31
+        sync
+.set reorder
+END(opal_atomic_sub_32)
+
+LEAF(opal_atomic_add_64)
+.set noreorder
+retry9:
+        lld     $2, 0($4)
+        daddu   $2, $2, $5
+        or      $3, $2, $0
+        scd     $3, 0($4)
+        beq     $3, $0, retry9
+        nop
+        j       $31
+        sync
+.set reorder
+END(opal_atomic_add_64)
+
+LEAF(opal_atomic_sub_64)
+.set noreorder
+retry10:
+        lld     $2, 0($4)
+        dsubu   $2, $2, $5
+        or      $3, $2, $0
+        scd     $3, 0($4)
+        beq     $3, $0, retry10
+        nop
+        j       $31
+        sync
+.set reorder
+END(opal_atomic_sub_64)
+
--- ompi-original/configure.ac	2006-05-10 17:32:24.000000000 +0000
+++ patched/configure.ac	2006-05-10 18:29:38.000000000 +0000
@@ -994,6 +994,12 @@
 AC_DEFINE_UNQUOTED(OMPI_WANT_LIBLTDL, $WANT_LIBLTDL,
     [Whether to include support for libltdl or not])

+############################################################################
+# testing for special case quirks that should be kept isolated from generic
+# tests and generic operations
+############################################################################
+
+AM_CONDITIONAL(OMPI_ASM_PRECOMPILE, test "$OMPI_ASM_PRECOMPILE" = "1") 

 ############################################################################
 # final compiler config
--- ompi-original/ompi/mca/io/romio/romio/configure.in	2006-05-10 17:31:36.000000000 +0000
+++ patched/ompi/mca/io/romio/romio/configure.in	2006-05-10 19:08:26.000000000 +0000
@@ -540,6 +540,10 @@
         MPI=mpich
         mpi_mpich=1
     fi
+    cputype=`uname -m`
+    if test $cputype = "mips64" ; then
+	MIPS=3
+    fi
 fi    
 #
 if test -n "$arch_SX4" ; then
--- ompi-original/config/ompi_config_asm.m4	2006-05-10 17:32:01.000000000 +0000
+++ patched/config/ompi_config_asm.m4	2006-05-10 21:29:02.000000000 +0000
@@ -822,12 +822,16 @@
             OMPI_GCC_INLINE_ASSIGN='"bis zero,zero,%0" : "=&r"(ret)'
             ;;

-        mips-*|mips64-*)
+        mips-*|mips64*)
             # Should really find some way to make sure that we are on
             # a MIPS III machine (r4000 and later)
             ompi_cv_asm_arch="MIPS"
             OMPI_ASM_SUPPORT_64BIT=1
             OMPI_GCC_INLINE_ASSIGN='"or %0,[$]0,[$]0" : "=&r"(ret)'
+	    if test "$host_os" = "linux" ; then
+		OMPI_ASM_ASM_H="OMPI_ASM_ASM_H"
+		OMPI_ASM_PRECOMPILE=1
+	    fi 
             ;;

         powerpc-*|powerpc64-*)

Reply via email to