Hi, As I've said before, I've been working on MIPS support for OpenMPI, as the current implementation is Irix-specific in places. Well, it is finally done and I present to you fixes for Linux on MIPS, some fixes for atomic operations bugs on the MIPS platform, and a fix for a GCC bug where it doesn't handle macros unless the file extension is .S, rather than .s.
Enjoy! Jonathan Day __________________________________________________ Do You Yahoo!? Tired of spam? Yahoo! Mail has the best spam protection around http://mail.yahoo.com
--- ompi-original/opal/include/opal/sys/mips/atomic.h 2006-05-10 17:30:39.000000000 +0000 +++ patched/opal/include/opal/sys/mips/atomic.h 2006-05-11 02:12:28.000000000 +0000 @@ -47,14 +47,20 @@ #define OPAL_HAVE_ATOMIC_CMPSET_32 1 #define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_MATH_32 0 +#define OPAL_HAVE_ATOMIC_ADD_32 1 +#define OPAL_HAVE_ATOMIC_SUB_32 1 +#define OPAL_HAVE_ATOMIC_ADD_64 1 +#define OPAL_HAVE_ATOMIC_SUB_64 1 +#if OMPI_GCC_INLINE_ASSEMBLY + /********************************************************************** * * Memory Barriers * *********************************************************************/ -#if OMPI_GCC_INLINE_ASSEMBLY static inline void opal_atomic_mb(void) @@ -76,14 +82,11 @@ WMB(); } -#endif - /********************************************************************** * * Atomic math operations * *********************************************************************/ -#if OMPI_GCC_INLINE_ASSEMBLY static inline int opal_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval, int32_t newval) @@ -92,19 +95,22 @@ int32_t tmp; __asm__ __volatile__ ("\t" - ".set noreorder \n" - "1: \n\t" - "ll %0, %2 \n\t" /* load *addr into ret */ - "bne %0, %3, 2f \n\t" /* done if oldval != ret */ - "or %5, %4, 0 \n\t" /* ret = newval */ - "sc %5, %2 \n\t" /* store ret in *addr */ - /* note: ret will be 0 if failed, 1 if succeeded */ - "bne %5, 1, 1b \n\t" - "2: \n\t" - ".set reorder \n" - : "=&r"(ret), "=m"(*addr) - : "m"(*addr), "r"(oldval), "r"(newval), "r"(tmp) + ".set noreorder\n" + "1:\n\t" + "ll %0, 0(%2)\n\t" /* load-linked *addr into ret */ + "bne %0, %3, 2f\n\t" /* return 0 if oldval != ret */ + "or %1, $0, %4\n\t" /* tmp = newval */ + "sc %1, 0(%2)\n\t" /* store-conditional tmp into *addr */ + /* note: tmp will be 0 if store failed, 1 if succeeded */ + "beq %1, $0, 1b\n\t" /* repeat if tmp == 0 */ + "nop\n\t" + "sync\n" + "2:\n\t" + ".set reorder\n" + : "=&r"(ret), "=&r"(tmp) + : "r"(addr), "r"(oldval), "r"(newval) : "cc", "memory"); + return (ret == oldval); } @@ -141,19 +147,20 @@ int64_t tmp; __asm__ __volatile__ ("\t" - ".set noreorder \n" - "1: \n\t" - "lld %0, %2 \n\t" /* load *addr into ret */ - "bne %0, %3, 2f \n\t" /* done if oldval != ret */ - "or %5, %4, 0 \n\t" /* tmp = newval */ - "scd %5, %2 \n\t" /* store tmp in *addr */ - /* note: ret will be 0 if failed, 1 if succeeded */ - "bne %5, 1, 1b \n" - "2: \n\t" - ".set reorder \n" - : "=&r" (ret), "=m" (*addr) - : "m" (*addr), "r" (oldval), "r" (newval), - "r"(tmp) + ".set noreorder\n" + "1:\n\t" + "lld %0, 0(%2)\n\t" /* load-linked *addr into ret */ + "bne %0, %3, 2f\n\t" /* return 0 if oldval != ret */ + "or %1, $0, %4\n\t" /* tmp = newval */ + "scd %1, 0(%2)\n\t" /* store-conditional tmp into *addr */ + /* note: tmp will be 0 if store failed, 1 if succeeded */ + "beq %1, $0, 1b\n\t" /* repeat if tmp == 0 */ + "nop\n\t" + "sync\n" + "2:\n\t" + ".set reorder\n" + : "=&r"(ret), "=&r"(tmp) + : "r"(addr), "r"(oldval), "r"(newval) : "cc", "memory"); return (ret == oldval); @@ -184,6 +191,114 @@ return opal_atomic_cmpset_64(addr, oldval, newval); } +#if ! OPAL_HAVE_ATOMIC_MATH_32 +static inline +#endif +int32_t opal_atomic_add_32(volatile int32_t *addr, int delta) +{ + int32_t ret; + int32_t tmp; + + __asm__ __volatile__ ("\t" + ".set noreorder\n" + "1:\n\t" + "ll %0, 0(%2)\n\t" /* load-linked ret = *addr */ + "addu %0, %0, %3\n\t" /* ret += delta */ + "or %1, %0, $0\n\t" /* tmp = ret */ + "sc %1, 0(%2)\n\t" /* store-conditional *addr = tmp */ + /* note: tmp will be 0 if store failed, 1 if succeeded */ + "beq %1, $0, 1b\n\t" /* repeat if store-conditional failed */ + "nop\n\t" + "sync\n" + ".set reorder\n" + : "=&r"(ret), "=&r"(tmp) + : "r"(addr), "r"(delta) + : "cc", "memory"); + + return ret; +} /* opal_atomic_add_32 */ + +#if ! OPAL_HAVE_ATOMIC_MATH_32 +static inline +#endif +int32_t opal_atomic_sub_32(volatile int32_t *addr, int delta) +{ + int32_t ret; + int32_t tmp; + + __asm__ __volatile__ ("\t" + ".set noreorder\n" + "1:\n\t" + "ll %0, 0(%2)\n\t" /* load-linked ret = *addr */ + "subu %0, %0, %3\n\t" /* ret -= delta */ + "or %1, %0, $0\n\t" /* tmp = ret */ + "sc %1, 0(%2)\n\t" /* store-conditional *addr = tmp */ + /* note: tmp will be 0 if store failed, 1 if succeeded */ + "beq %1, $0, 1b\n\t" /* repeat if store-conditional failed */ + "nop\n\t" + "sync\n" + ".set reorder\n" + : "=&r"(ret), "=&r"(tmp) + : "r"(addr), "r"(delta) + : "cc", "memory"); + + return ret; +} /* opal_atomic_sub_32 */ + +#if OPAL_HAVE_ATOMIC_CMPSET_64 +static inline +#endif +int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) +{ + int64_t ret; + int64_t tmp; + + __asm__ __volatile__ ("\t" + ".set noreorder\n" + "1:\n\t" + "lld %0, 0(%2)\n\t" /* load-linked ret = *addr */ + "daddu %0, %0, %3\n\t" /* ret += delta */ + "or %1, %0, $0\n\t" /* tmp = ret */ + "scd %1, 0(%2)\n\t" /* store-conditional *addr = tmp */ + /* note: tmp will be 0 if store failed, 1 if succeeded */ + "beq %1, $0, 1b\n\t" /* repeat if store-conditional failed */ + "nop\n\t" + "sync\n" + ".set reorder\n" + : "=&r"(ret), "=&r"(tmp) + : "r"(addr), "r"(delta) + : "cc", "memory"); + + return ret; +} /* opal_atomic_add_64 */ + +#if OPAL_HAVE_ATOMIC_CMPSET_64 +static inline +#endif +int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) +{ + int64_t ret; + int64_t tmp; + + __asm__ __volatile__ ("\t" + ".set noreorder\n" + "1:\n\t" + "lld %0, 0(%2)\n\t" /* load-linked ret = *addr */ + "dsubu %0, %0, %3\n\t" /* ret -= delta */ + "or %1, %0, $0\n\t" /* tmp = ret */ + "scd %1, 0(%2)\n\t" /* store-conditional *addr = tmp */ + /* note: tmp will be 0 if store failed, 1 if succeeded */ + "beq %1, $0, 1b\n\t" /* repeat if store-conditional failed */ + "nop\n\t" + "sync\n" + ".set reorder\n" + : "=&r"(ret), "=&r"(tmp) + : "r"(addr), "r"(delta) + : "cc", "memory"); + + return ret; +} /* opal_atomic_sub_64 */ + #endif /* OMPI_GCC_INLINE_ASSEMBLY */ #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */ --- ompi-original/opal/asm/Makefile.am 2006-05-10 17:30:27.000000000 +0000 +++ patched/opal/asm/Makefile.am 2006-05-10 21:30:56.000000000 +0000 @@ -31,6 +31,14 @@ atomic-asm.s: generated/@OMPI_ASM_FILE@ rm -f atomic-asm.s + @ if test "$(OMPI_ASM_PRECOMPILE)" = "1" ; then \ + cmd="mv generated/@OMPI_ASM_FILE@ generated/original.S" ; \ + echo "$$cmd" ; \ + eval $$cmd ; \ + cmd="$(CC) -D$(OMPI_ASM_ASM_H) -S generated/original.S > generated/@OMPI_ASM_FILE@" ; \ + echo "$$cmd" ; \ + eval $$cmd ; \ + fi @ if test -f "$(top_builddir)/opal/asm/generated/@OMPI_ASM_FILE@" ; then \ cmd="ln -s \"$(top_builddir)/opal/asm/generated/@OMPI_ASM_FILE@\" atomic-asm.s" ; \ echo "$$cmd" ; \ --- ompi-original/opal/asm/base/MIPS.asm 2006-05-10 17:30:27.000000000 +0000 +++ patched/opal/asm/base/MIPS.asm 2006-05-11 18:41:56.000000000 +0000 @@ -1,130 +1,195 @@ START_FILE +#ifdef OMPI_ASM_ASM_H +#include <asm/asm.h> +#include <asm/regdef.h> +#else #include <asm.h> #include <regdef.h> - - TEXT +#endif + + TEXT - ALIGN(8) + ALIGN(8) LEAF(opal_atomic_mb) - sync - j ra + j $31 + sync END(opal_atomic_mb) - - ALIGN(8) + + ALIGN(8) LEAF(opal_atomic_rmb) - sync - j ra + j $31 + sync END(opal_atomic_rmb) - - + + LEAF(opal_atomic_wmb) - sync - j ra + j $31 + sync END(opal_atomic_wmb) LEAF(opal_atomic_cmpset_32) - .set noreorder -retry1: - ll $3, 0($4) - bne $3, $5, done1 - or $2, $6, 0 - sc $2, 0($4) - bne $2, 1, retry1 -done1: - .set reorder - - xor $3,$3,$5 - j ra - sltu $2,$3,1 +.set noreorder +retry1: + ll $3, 0($4) + bne $3, $5, done1 + or $2, $0, $6 + sc $2, 0($4) + beq $2, $0, retry1 + nop + j $31 + sync +done1: + j $31 + or $2, $0, $0 +.set reorder END(opal_atomic_cmpset_32) LEAF(opal_atomic_cmpset_acq_32) - .set noreorder -retry2: - ll $3, 0($4) - bne $3, $5, done2 - or $2, $6, 0 - sc $2, 0($4) - bne $2, 1, retry2 -done2: - sync - .set reorder - - xor $3,$3,$5 - j ra - sltu $2,$3,1 +.set noreorder +retry2: + ll $3, 0($4) + bne $3, $5, done2 + or $2, $0, $6 + sc $2, 0($4) + beq $2, $0, retry2 + nop + j $31 + sync +done2: + j $31 + or $2, $0, $0 +.set reorder END(opal_atomic_cmpset_acq_32) - + LEAF(opal_atomic_cmpset_rel_32) - .set noreorder - sync -retry3: - ll $3, 0($4) - bne $3, $5, done3 - or $2, $6, 0 - sc $2, 0($4) - bne $2, 1, retry3 -done3: - .set reorder - - xor $3,$3,$5 - j ra - sltu $2,$3,1 +.set noreorder +retry3: + ll $3, 0($4) + bne $3, $5, done3 + or $2, $0, $6 + sc $2, 0($4) + beq $2, $0, retry3 + nop + j $31 + sync +done3: + j $31 + or $2, $0, $0 +.set reorder END(opal_atomic_cmpset_rel_32) - - + + LEAF(opal_atomic_cmpset_64) - .set noreorder -retry4: - lld $3, 0($4) - bne $3, $5, done4 - or $2, $6, 0 - scd $2, 0($4) - bne $2, 1, retry4 -done4: - .set reorder - - xor $4,$3,$5 - j ra - sltu $3,$4,1 +.set noreorder +retry4: + lld $3, 0($4) + bne $3, $5, done4 + or $2, $0, $6 + scd $2, 0($4) + beq $2, $0, retry4 + nop + j $31 + sync +done4: + j $31 + or $2, $0, $0 +.set reorder END(opal_atomic_cmpset_64) LEAF(opal_atomic_cmpset_acq_64) - .set noreorder -retry5: - lld $3, 0($4) - bne $3, $5, done5 - or $2, $6, 0 - scd $2, 0($4) - bne $2, 1, retry5 -done5: - .set reorder - sync - xor $4,$3,$5 - j ra - sltu $3,$4,1 +.set noreorder +retry5: + lld $3, 0($4) + bne $3, $5, done5 + or $2, $0, $6 + scd $2, 0($4) + beq $2, $0, retry5 + nop + j $31 + sync +done5: + j $31 + or $2, $0, $0 +.set reorder END(opal_atomic_cmpset_acq_64) LEAF(opal_atomic_cmpset_rel_64) - .set noreorder - sync -retry6: - lld $3, 0($4) - bne $3, $5, done6 - or $2, $6, 0 - scd $2, 0($4) - bne $2, 1, retry6 -done6: - .set reorder - - xor $4,$3,$5 - j ra - sltu $3,$4,1 +.set noreorder +retry6: + lld $3, 0($4) + bne $3, $5, done6 + or $2, $0, $6 + scd $2, 0($4) + beq $2, $0, retry6 + nop + j $31 + sync +done6: + j $31 + or $2, $0, $0 +.set reorder END(opal_atomic_cmpset_rel_64) + +LEAF(opal_atomic_add_32) +.set noreorder +retry7: + ll $2, 0($4) + addu $2, $2, $5 + or $3, $2, $0 + sc $3, 0($4) + beq $3, $0, retry7 + nop + j $31 + sync +.set reorder +END(opal_atomic_add_32) + +LEAF(opal_atomic_sub_32) +.set noreorder +retry8: + ll $2, 0($4) + subu $2, $2, $5 + or $3, $2, $0 + sc $3, 0($4) + beq $3, $0, retry8 + nop + j $31 + sync +.set reorder +END(opal_atomic_sub_32) + +LEAF(opal_atomic_add_64) +.set noreorder +retry9: + lld $2, 0($4) + daddu $2, $2, $5 + or $3, $2, $0 + scd $3, 0($4) + beq $3, $0, retry9 + nop + j $31 + sync +.set reorder +END(opal_atomic_add_64) + +LEAF(opal_atomic_sub_64) +.set noreorder +retry10: + lld $2, 0($4) + dsubu $2, $2, $5 + or $3, $2, $0 + scd $3, 0($4) + beq $3, $0, retry10 + nop + j $31 + sync +.set reorder +END(opal_atomic_sub_64) + --- ompi-original/configure.ac 2006-05-10 17:32:24.000000000 +0000 +++ patched/configure.ac 2006-05-10 18:29:38.000000000 +0000 @@ -994,6 +994,12 @@ AC_DEFINE_UNQUOTED(OMPI_WANT_LIBLTDL, $WANT_LIBLTDL, [Whether to include support for libltdl or not]) +############################################################################ +# testing for special case quirks that should be kept isolated from generic +# tests and generic operations +############################################################################ + +AM_CONDITIONAL(OMPI_ASM_PRECOMPILE, test "$OMPI_ASM_PRECOMPILE" = "1") ############################################################################ # final compiler config --- ompi-original/ompi/mca/io/romio/romio/configure.in 2006-05-10 17:31:36.000000000 +0000 +++ patched/ompi/mca/io/romio/romio/configure.in 2006-05-10 19:08:26.000000000 +0000 @@ -540,6 +540,10 @@ MPI=mpich mpi_mpich=1 fi + cputype=`uname -m` + if test $cputype = "mips64" ; then + MIPS=3 + fi fi # if test -n "$arch_SX4" ; then --- ompi-original/config/ompi_config_asm.m4 2006-05-10 17:32:01.000000000 +0000 +++ patched/config/ompi_config_asm.m4 2006-05-10 21:29:02.000000000 +0000 @@ -822,12 +822,16 @@ OMPI_GCC_INLINE_ASSIGN='"bis zero,zero,%0" : "=&r"(ret)' ;; - mips-*|mips64-*) + mips-*|mips64*) # Should really find some way to make sure that we are on # a MIPS III machine (r4000 and later) ompi_cv_asm_arch="MIPS" OMPI_ASM_SUPPORT_64BIT=1 OMPI_GCC_INLINE_ASSIGN='"or %0,[$]0,[$]0" : "=&r"(ret)' + if test "$host_os" = "linux" ; then + OMPI_ASM_ASM_H="OMPI_ASM_ASM_H" + OMPI_ASM_PRECOMPILE=1 + fi ;; powerpc-*|powerpc64-*)