Hi Gilles,
it is not necessary to have a heterogeneous environment to reproduce
the error as you can see below. All machines are 64 bit.
tyr spawn 119 ompi_info | grep -e "OPAL repo revision" -e "C compiler absolute"
OPAL repo revision: v2.x-dev-1290-gbd0e4e1
C compiler absolute: /usr/local/gcc-5.1.0/bin/gcc
tyr spawn 120 uname -a
SunOS tyr.informatik.hs-fulda.de 5.10 Generic_150400-11 sun4u sparc SUNW,A70
Solaris
tyr spawn 121 mpiexec -np 1 --host tyr,tyr,tyr,tyr spawn_multiple_master
Parent process 0 running on tyr.informatik.hs-fulda.de
I create 3 slave processes.
[tyr.informatik.hs-fulda.de:27286] PMIX ERROR: UNPACK-PAST-END in file
../../../../../../openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c
at line 829
[tyr.informatik.hs-fulda.de:27286] PMIX ERROR: UNPACK-PAST-END in file
../../../../../../openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c
at line 2176
[tyr:27288] *** An error occurred in MPI_Comm_spawn_multiple
[tyr:27288] *** reported by process [3434086401,0]
[tyr:27288] *** on communicator MPI_COMM_WORLD
[tyr:27288] *** MPI_ERR_SPAWN: could not spawn processes
[tyr:27288] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now
abort,
[tyr:27288] *** and potentially your MPI job)
tyr spawn 122
sunpc1 fd1026 105 ompi_info | grep -e "OPAL repo revision" -e "C compiler
absolute"
OPAL repo revision: v2.x-dev-1290-gbd0e4e1
C compiler absolute: /usr/local/gcc-5.1.0/bin/gcc
sunpc1 fd1026 106 uname -a
SunOS sunpc1 5.10 Generic_147441-21 i86pc i386 i86pc Solaris
sunpc1 fd1026 107 mpiexec -np 1 --host sunpc1,sunpc1,sunpc1,sunpc1
spawn_multiple_master
Parent process 0 running on sunpc1
I create 3 slave processes.
[sunpc1:00368] PMIX ERROR: UNPACK-PAST-END in file
../../../../../../openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c
at line 829
[sunpc1:00368] PMIX ERROR: UNPACK-PAST-END in file
../../../../../../openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c
at line 2176
[sunpc1:370] *** An error occurred in MPI_Comm_spawn_multiple
[sunpc1:370] *** reported by process [43909121,0]
[sunpc1:370] *** on communicator MPI_COMM_WORLD
[sunpc1:370] *** MPI_ERR_SPAWN: could not spawn processes
[sunpc1:370] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now
abort,
[sunpc1:370] *** and potentially your MPI job)
sunpc1 fd1026 108
linpc1 fd1026 105 ompi_info | grep -e "OPAL repo revision" -e "C compiler
absolute"
OPAL repo revision: v2.x-dev-1290-gbd0e4e1
C compiler absolute: /usr/local/gcc-5.1.0/bin/gcc
linpc1 fd1026 106 uname -a
Linux linpc1 3.1.10-1.29-desktop #1 SMP PREEMPT Fri May 31 20:10:04 UTC 2013
(2529847) x86_64 x86_64 x86_64 GNU/Linux
linpc1 fd1026 107 mpiexec -np 1 --host linpc1,linpc1,linpc1,linpc1
spawn_multiple_master
Parent process 0 running on linpc1
I create 3 slave processes.
[linpc1:21502] PMIX ERROR: UNPACK-PAST-END in file
../../../../../../openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c
at line 829
[linpc1:21502] PMIX ERROR: UNPACK-PAST-END in file
../../../../../../openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c
at line 2176
[linpc1:21507] *** An error occurred in MPI_Comm_spawn_multiple
[linpc1:21507] *** reported by process [1005518849,0]
[linpc1:21507] *** on communicator MPI_COMM_WORLD
[linpc1:21507] *** MPI_ERR_SPAWN: could not spawn processes
[linpc1:21507] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will
now abort,
[linpc1:21507] *** and potentially your MPI job)
linpc1 fd1026 108
I used the following configure command.
../openmpi-v2.x-dev-1290-gbd0e4e1/configure \
--prefix=/usr/local/openmpi-2.0.0_64_gcc \
--libdir=/usr/local/openmpi-2.0.0_64_gcc/lib64 \
--with-jdk-bindir=/usr/local/jdk1.8.0/bin \
--with-jdk-headers=/usr/local/jdk1.8.0/include \
JAVA_HOME=/usr/local/jdk1.8.0 \
LDFLAGS="-m64" CC="gcc" CXX="g++" FC="gfortran" \
CFLAGS="-m64" CXXFLAGS="-m64" FCFLAGS="-m64" \
CPP="cpp" CXXCPP="cpp" \
--enable-mpi-cxx \
--enable-cxx-exceptions \
--enable-mpi-java \
--enable-heterogeneous \
--enable-mpi-thread-multiple \
--with-hwloc=internal \
--without-verbs \
--with-wrapper-cflags="-std=c11 -m64" \
--with-wrapper-cxxflags="-m64" \
--with-wrapper-fcflags="-m64" \
--enable-debug \
|& tee log.configure.$SYSTEM_ENV.$MACHINE_ENV.64_gcc
Kind regards
Siegmar
Am 27.04.2016 um 13:21 schrieb Gilles Gouaillardet:
Siegmar,
please add this to your CFLAGS for the time being.
configure tries to detect which flags must be added for C99 support, and it
seems
the test is not working for Solaris 10 and Oracle compilers.
this is no more a widely used environment, and I am not sure I can find the
time to fix this
in a near future.
regarding the runtime issue, can you please describe your 4 hosts (os,
endianness and bitness)
Cheers,
Gilles
On Wednesday, April 27, 2016, Siegmar Gross
<siegmar.gr...@informatik.hs-fulda.de
<javascript:_e(%7B%7D,'cvml','siegmar.gr...@informatik.hs-fulda.de');>> wrote:
Hi Gilles,
adding "-std=c99" to CFLAGS solves the problem with the missing library.
Shall I add it permanently to my configure command or will you add it,
so that I will not run into problems if you need the C11 standard later?
"spawn_multiple_master" breaks with the same error that I reported
yesterday for my gcc-version of Open MPI. Hopefully you can solve the
problem as well.
Kind regards and thank you very much for your help
Siegmar
Am 27.04.2016 um 08:05 schrieb Gilles Gouaillardet:
Siegmar,
here is the error :
configure:17969: cc -o conftest -m64 -D_REENTRANT -g -g
-I/export2/src/openmpi-2.0.0/openmpi-v2.x-dev-1290-gbd0e4e1
-I/export2/src/openmpi-2.0.0/openmpi-v2.x-dev-1290-gbd0e4e1-SunOS.sparc.64_cc
-I/export2/src/openmpi-2.0.0/openmpi-v2.x-dev-1290-gbd0e4e1/opal/include
-I/export2/src/openmpi-2.0.0/openmpi-v2.x-dev-1290-gbd0e4e1-SunOS.sparc.64_cc/opal/include
-D_REENTRANT
-I/export2/src/openmpi-2.0.0/openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/hwloc/hwloc1112/hwloc/include
-I/export2/src/openmpi-2.0.0/openmpi-v2.x-dev-1290-gbd0e4e1-SunOS.sparc.64_cc/opal/mca/hwloc/hwloc1112/hwloc/include
-I/export2/src/openmpi-2.0.0/openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/event/libevent2022/libevent
-I/export2/src/openmpi-2.0.0/openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/event/libevent2022/libevent/include
-I/export2/src/openmpi-2.0.0/openmpi-v2.x-dev-1290-gbd0e4e1-SunOS.sparc.64_cc/opal/mca/event/libevent2022/libevent/include
-m64 conftest.c >&5
"/usr/include/stdbool.h", line 42: #error: "Use of <stdbool.h> is
valid only
in a c99 compilation environment."
i cannot reproduce this on solaris 11 with oracle studio 5.3 compiler,
and i
do not have solaris 10 yet.
could you please re-configure with '-std=c99' appended to your CFLAGS
and see
if it helps ?
Cheers,
Gilles
On 4/26/2016 7:57 PM, Siegmar Gross wrote:
Hi Gilles and Ralph,
I was able to sort out my mess. In my last email I compared the
files from "SunOS_sparc/openmpi-2.0.0_64_gcc/lib64/openmpi" from
the attachment of my email to Ralph with the files from
"SunOS_sparc/openmpi-2.0.0_64_cc/lib64/openmpi" from my current
file system. That's the reason while I have had different
timestamps. The other problem was that Ralph didn't recognize
that "mca_pmix_pmix112.so" wasn't built on Solaris with the
Sun C compiler. I've removed most of the files from the attachment
of my email so that it is easier to see the relevant files. Below
I try to give you more information that may be relevant to track
down the problem. I still get an error running one of my small
test programs, when I use my gcc-version of Open MPI.
"mca_pmix_pmix112.so" is a 64 bits library.
Linux_x86_64/openmpi-2.0.0_64_cc/lib64/openmpi:
...
-rwxr-xr-x 1 root root 261327 Apr 19 16:46 mca_plm_slurm.so
-rwxr-xr-x 1 root root 1002 Apr 19 16:45 mca_pmix_pmix112.la
<http://mca_pmix_pmix112.la>
-rwxr-xr-x 1 root root 3906526 Apr 19 16:45 mca_pmix_pmix112.so
-rwxr-xr-x 1 root root 966 Apr 19 16:51 mca_pml_cm.la
<http://mca_pml_cm.la>
-rwxr-xr-x 1 root root 1574265 Apr 19 16:51 mca_pml_cm.so
...
Linux_x86_64/openmpi-2.0.0_64_gcc/lib64/openmpi:
...
-rwxr-xr-x 1 root root 70371 Apr 19 16:43 mca_plm_slurm.so
-rwxr-xr-x 1 root root 1008 Apr 19 16:42 mca_pmix_pmix112.la
<http://mca_pmix_pmix112.la>
-rwxr-xr-x 1 root root 1029005 Apr 19 16:42 mca_pmix_pmix112.so
-rwxr-xr-x 1 root root 972 Apr 19 16:46 mca_pml_cm.la
<http://mca_pml_cm.la>
-rwxr-xr-x 1 root root 284858 Apr 19 16:46 mca_pml_cm.so
...
SunOS_sparc/openmpi-2.0.0_64_cc/lib64/openmpi:
...
-rwxr-xr-x 1 root root 319816 Apr 19 19:58 mca_plm_rsh.so
-rwxr-xr-x 1 root root 970 Apr 19 20:00 mca_pml_cm.la
<http://mca_pml_cm.la>
-rwxr-xr-x 1 root root 1507440 Apr 19 20:00 mca_pml_cm.so
...
SunOS_sparc/openmpi-2.0.0_64_gcc/lib64/openmpi:
...
-rwxr-xr-x 1 root root 153280 Apr 19 19:49 mca_plm_rsh.so
-rwxr-xr-x 1 root root 1007 Apr 19 19:47 mca_pmix_pmix112.la
<http://mca_pmix_pmix112.la>
-rwxr-xr-x 1 root root 1400512 Apr 19 19:47 mca_pmix_pmix112.so
-rwxr-xr-x 1 root root 971 Apr 19 19:52 mca_pml_cm.la
<http://mca_pml_cm.la>
-rwxr-xr-x 1 root root 342440 Apr 19 19:52 mca_pml_cm.so
...
SunOS_x86_64/openmpi-2.0.0_64_cc/lib64/openmpi:
...
-rwxr-xr-x 1 root root 300096 Apr 19 17:18 mca_plm_rsh.so
-rwxr-xr-x 1 root root 970 Apr 19 17:23 mca_pml_cm.la
<http://mca_pml_cm.la>
-rwxr-xr-x 1 root root 1458816 Apr 19 17:23 mca_pml_cm.so
...
SunOS_x86_64/openmpi-2.0.0_64_gcc/lib64/openmpi:
...
-rwxr-xr-x 1 root root 133096 Apr 19 17:42 mca_plm_rsh.so
-rwxr-xr-x 1 root root 1007 Apr 19 17:41 mca_pmix_pmix112.la
<http://mca_pmix_pmix112.la>
-rwxr-xr-x 1 root root 1320240 Apr 19 17:41 mca_pmix_pmix112.so
-rwxr-xr-x 1 root root 971 Apr 19 17:46 mca_pml_cm.la
<http://mca_pml_cm.la>
-rwxr-xr-x 1 root root 419848 Apr 19 17:46 mca_pml_cm.so
...
Yesterday I've installed openmpi-v2.x-dev-1290-gbd0e4e1 so that we
have a current version for the investigation of the problem. Once
more mca_pmix_pmix112.so isn't available on Solaris if I use the
Sun C compiler.
"config.log" for gcc-5.1.0 shows the following.
...
configure:127799: /bin/bash
'../../../../../../openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/pmix/pmix112/
pmix/configure' succeeded for opal/mca/pmix/pmix112/pmix
configure:127916: checking if MCA component pmix:pmix112 can compile
configure:127918: result: yes
configure:5637: --- MCA component pmix:external (m4 configuration
macro)
configure:128523: checking for MCA component pmix:external compile
mode
configure:128529: result: dso
configure:129054: checking if MCA component pmix:external can
compile
configure:129056: result: no
...
config.status:3897: creating opal/mca/pmix/Makefile
config.status:3897: creating opal/mca/pmix/s1/Makefile
config.status:3897: creating opal/mca/pmix/cray/Makefile
config.status:3897: creating opal/mca/pmix/s2/Makefile
config.status:3897: creating opal/mca/pmix/pmix112/Makefile
config.status:3897: creating opal/mca/pmix/external/Makefile
...
MCA_BUILD_opal_pmix_cray_DSO_FALSE='#'
MCA_BUILD_opal_pmix_cray_DSO_TRUE=''
MCA_BUILD_opal_pmix_external_DSO_FALSE='#'
MCA_BUILD_opal_pmix_external_DSO_TRUE=''
MCA_BUILD_opal_pmix_pmix112_DSO_FALSE='#'
MCA_BUILD_opal_pmix_pmix112_DSO_TRUE=''
MCA_BUILD_opal_pmix_s1_DSO_FALSE='#'
MCA_BUILD_opal_pmix_s1_DSO_TRUE=''
MCA_BUILD_opal_pmix_s2_DSO_FALSE='#'
MCA_BUILD_opal_pmix_s2_DSO_TRUE=''
...
MCA_opal_FRAMEWORKS='common allocator backtrace btl dl event hwloc
if
installdirs memchecker memcpy memory mpool pmix pstat rcache sec
shmem timer'
MCA_opal_FRAMEWORKS_SUBDIRS='mca/common mca/allocator
mca/backtrace mca/btl
mca/dl mca/event mca/hwloc mca/if mca/installdirs mca/memchecker
mca/memcpy
mca/memory mca/mpool mca/pmix mca/pstat mca/rcache mca/sec
mca/shmem mca/timer'
MCA_opal_FRAMEWORK_COMPONENT_ALL_SUBDIRS='$(MCA_opal_common_ALL_SUBDIRS)
$(MCA_opal_allocator_ALL_SUBDIRS) $(MCA_opal_backtrace_ALL_SUBDIRS)
$(MCA_opal_btl_ALL_SUBDIRS) $(MCA_opal_dl_ALL_SUBDIRS)
$(MCA_opal_event_ALL_SUBDIRS) $(MCA_opal_hwloc_ALL_SUBDIRS)
$(MCA_opal_if_ALL_SUBDIRS) $(MCA_opal_installdirs_ALL_SUBDIRS)
$(MCA_opal_memchecker_ALL_SUBDIRS) $(MCA_opal_memcpy_ALL_SUBDIRS)
$(MCA_opal_memory_ALL_SUBDIRS) $(MCA_opal_mpool_ALL_SUBDIRS)
$(MCA_opal_pmix_ALL_SUBDIRS) $(MCA_opal_pstat_ALL_SUBDIRS)
$(MCA_opal_rcache_ALL_SUBDIRS) $(MCA_opal_sec_ALL_SUBDIRS)
$(MCA_opal_shmem_ALL_SUBDIRS) $(MCA_opal_timer_ALL_SUBDIRS)'
MCA_opal_FRAMEWORK_COMPONENT_DSO_SUBDIRS='$(MCA_opal_common_DSO_SUBDIRS)
$(MCA_opal_allocator_DSO_SUBDIRS) $(MCA_opal_backtrace_DSO_SUBDIRS)
$(MCA_opal_btl_DSO_SUBDIRS) $(MCA_opal_dl_DSO_SUBDIRS)
$(MCA_opal_event_DSO_SUBDIRS) $(MCA_opal_hwloc_DSO_SUBDIRS)
$(MCA_opal_if_DSO_SUBDIRS) $(MCA_opal_installdirs_DSO_SUBDIRS)
$(MCA_opal_memchecker_DSO_SUBDIRS) $(MCA_opal_memcpy_DSO_SUBDIRS)
$(MCA_opal_memory_DSO_SUBDIRS) $(MCA_opal_mpool_DSO_SUBDIRS)
$(MCA_opal_pmix_DSO_SUBDIRS) $(MCA_opal_pstat_DSO_SUBDIRS)
$(MCA_opal_rcache_DSO_SUBDIRS) $(MCA_opal_sec_DSO_SUBDIRS)
$(MCA_opal_shmem_DSO_SUBDIRS) $(MCA_opal_timer_DSO_SUBDIRS)'
MCA_opal_FRAMEWORK_COMPONENT_STATIC_SUBDIRS='$(MCA_opal_common_STATIC_SUBDIRS)
$(MCA_opal_allocator_STATIC_SUBDIRS)
$(MCA_opal_backtrace_STATIC_SUBDIRS)
$(MCA_opal_btl_STATIC_SUBDIRS) $(MCA_opal_dl_STATIC_SUBDIRS)
$(MCA_opal_event_STATIC_SUBDIRS) $(MCA_opal_hwloc_STATIC_SUBDIRS)
$(MCA_opal_if_STATIC_SUBDIRS) $(MCA_opal_installdirs_STATIC_SUBDIRS)
$(MCA_opal_memchecker_STATIC_SUBDIRS)
$(MCA_opal_memcpy_STATIC_SUBDIRS)
$(MCA_opal_memory_STATIC_SUBDIRS) $(MCA_opal_mpool_STATIC_SUBDIRS)
$(MCA_opal_pmix_STATIC_SUBDIRS) $(MCA_opal_pstat_STATIC_SUBDIRS)
$(MCA_opal_rcache_STATIC_SUBDIRS) $(MCA_opal_sec_STATIC_SUBDIRS)
$(MCA_opal_shmem_STATIC_SUBDIRS) $(MCA_opal_timer_STATIC_SUBDIRS)'
MCA_opal_FRAMEWORK_LIBS=' $(MCA_opal_common_STATIC_LTLIBS)
mca/allocator/libmca_allocator.la <http://libmca_allocator.la>
$(MCA_opal_allocator_STATIC_LTLIBS)
mca/backtrace/libmca_backtrace.la <http://libmca_backtrace.la>
$(MCA_opal_backtrace_STATIC_LTLIBS)
mca/btl/libmca_btl.la <http://libmca_btl.la>
$(MCA_opal_btl_STATIC_LTLIBS) mca/dl/libmca_dl.la
<http://libmca_dl.la>
$(MCA_opal_dl_STATIC_LTLIBS) mca/event/libmca_event.la
<http://libmca_event.la>
$(MCA_opal_event_STATIC_LTLIBS) mca/hwloc/libmca_hwloc.la
<http://libmca_hwloc.la>
$(MCA_opal_hwloc_STATIC_LTLIBS) mca/if/libmca_if.la
<http://libmca_if.la>
$(MCA_opal_if_STATIC_LTLIBS) mca/installdirs/libmca_installdirs.la
<http://libmca_installdirs.la>
$(MCA_opal_installdirs_STATIC_LTLIBS)
mca/memchecker/libmca_memchecker.la <http://libmca_memchecker.la>
$(MCA_opal_memchecker_STATIC_LTLIBS) mca/memcpy/libmca_memcpy.la
<http://libmca_memcpy.la>
$(MCA_opal_memcpy_STATIC_LTLIBS) mca/memory/libmca_memory.la
<http://libmca_memory.la>
$(MCA_opal_memory_STATIC_LTLIBS) mca/mpool/libmca_mpool.la
<http://libmca_mpool.la>
$(MCA_opal_mpool_STATIC_LTLIBS) mca/pmix/libmca_pmix.la
<http://libmca_pmix.la>
$(MCA_opal_pmix_STATIC_LTLIBS) mca/pstat/libmca_pstat.la
<http://libmca_pstat.la>
$(MCA_opal_pstat_STATIC_LTLIBS) mca/rcache/libmca_rcache.la
<http://libmca_rcache.la>
$(MCA_opal_rcache_STATIC_LTLIBS) mca/sec/libmca_sec.la
<http://libmca_sec.la>
$(MCA_opal_sec_STATIC_LTLIBS) mca/shmem/libmca_shmem.la
<http://libmca_shmem.la>
$(MCA_opal_shmem_STATIC_LTLIBS) mca/timer/libmca_timer.la
<http://libmca_timer.la>
$(MCA_opal_timer_STATIC_LTLIBS)'
...
MCA_opal_pmix_ALL_COMPONENTS=' s1 cray s2 pmix112 external'
MCA_opal_pmix_ALL_SUBDIRS=' mca/pmix/s1 mca/pmix/cray mca/pmix/s2
mca/pmix/pmix112 mca/pmix/external'
MCA_opal_pmix_DSO_COMPONENTS=' pmix112'
MCA_opal_pmix_DSO_SUBDIRS=' mca/pmix/pmix112'
MCA_opal_pmix_STATIC_COMPONENTS=''
MCA_opal_pmix_STATIC_LTLIBS=''
MCA_opal_pmix_STATIC_SUBDIRS=''
...
opal_pmix_ext_CPPFLAGS=''
opal_pmix_ext_LDFLAGS=''
opal_pmix_ext_LIBS=''
opal_pmix_pmix112_CPPFLAGS='-I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix112/pmix/include/pmix
-I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix112/pmix/include
-I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix112/pmix
-I$(OPAL_TOP_SRCDIR)/opal/mca/pmix/pmix112/pmix'
opal_pmix_pmix112_LIBS='$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix112/pmix/libpmix.la
<http://libpmix.la>'
...
"config.log" for Sun C 5.13 shows the following.
...
configure:127803: /bin/bash
'../../../../../../openmpi-v2.x-dev-1290-gbd0e4e1/opal/mca/pmix/pmix112/
pmix/configure' *failed* for opal/mca/pmix/pmix112/pmix
configure:128379: checking if MCA component pmix:pmix112 can compile
configure:128381: result: no
configure:5637: --- MCA component pmix:external (m4 configuration
macro)
configure:128523: checking for MCA component pmix:external compile
mode
configure:128529: result: dso
configure:129054: checking if MCA component pmix:external can
compile
configure:129056: result: no
...
config.status:3887: creating opal/mca/pmix/Makefile
config.status:3887: creating opal/mca/pmix/s1/Makefile
config.status:3887: creating opal/mca/pmix/cray/Makefile
config.status:3887: creating opal/mca/pmix/s2/Makefile
config.status:3887: creating opal/mca/pmix/pmix112/Makefile
config.status:3887: creating opal/mca/pmix/external/Makefile
...
MCA_BUILD_opal_pmix_cray_DSO_FALSE='#'
MCA_BUILD_opal_pmix_cray_DSO_TRUE=''
MCA_BUILD_opal_pmix_external_DSO_FALSE='#'
MCA_BUILD_opal_pmix_external_DSO_TRUE=''
MCA_BUILD_opal_pmix_pmix112_DSO_FALSE='#'
MCA_BUILD_opal_pmix_pmix112_DSO_TRUE=''
MCA_BUILD_opal_pmix_s1_DSO_FALSE='#'
MCA_BUILD_opal_pmix_s1_DSO_TRUE=''
MCA_BUILD_opal_pmix_s2_DSO_FALSE='#'
MCA_BUILD_opal_pmix_s2_DSO_TRUE=''
...
MCA_opal_FRAMEWORKS='common allocator backtrace btl dl event hwloc
if
installdirs memchecker memcpy memory mpool pmix pstat rcache sec
shmem timer'
MCA_opal_FRAMEWORKS_SUBDIRS='mca/common mca/allocator
mca/backtrace mca/btl
mca/dl mca/event mca/hwloc mca/if mca/installdirs mca/memchecker
mca/memcpy
mca/memory mca/mpool mca/pmix mca/pstat mca/rcache mca/sec
mca/shmem mca/timer'
MCA_opal_FRAMEWORK_COMPONENT_ALL_SUBDIRS='$(MCA_opal_common_ALL_SUBDIRS)
$(MCA_opal_allocator_ALL_SUBDIRS) $(MCA_opal_backtrace_ALL_SUBDIRS)
$(MCA_opal_btl_ALL_SUBDIRS) $(MCA_opal_dl_ALL_SUBDIRS)
$(MCA_opal_event_ALL_SUBDIRS) $(MCA_opal_hwloc_ALL_SUBDIRS)
$(MCA_opal_if_ALL_SUBDIRS) $(MCA_opal_installdirs_ALL_SUBDIRS)
$(MCA_opal_memchecker_ALL_SUBDIRS) $(MCA_opal_memcpy_ALL_SUBDIRS)
$(MCA_opal_memory_ALL_SUBDIRS) $(MCA_opal_mpool_ALL_SUBDIRS)
$(MCA_opal_pmix_ALL_SUBDIRS) $(MCA_opal_pstat_ALL_SUBDIRS)
$(MCA_opal_rcache_ALL_SUBDIRS) $(MCA_opal_sec_ALL_SUBDIRS)
$(MCA_opal_shmem_ALL_SUBDIRS) $(MCA_opal_timer_ALL_SUBDIRS)'
MCA_opal_FRAMEWORK_COMPONENT_DSO_SUBDIRS='$(MCA_opal_common_DSO_SUBDIRS)
$(MCA_opal_allocator_DSO_SUBDIRS) $(MCA_opal_backtrace_DSO_SUBDIRS)
$(MCA_opal_btl_DSO_SUBDIRS) $(MCA_opal_dl_DSO_SUBDIRS)
$(MCA_opal_event_DSO_SUBDIRS) $(MCA_opal_hwloc_DSO_SUBDIRS)
$(MCA_opal_if_DSO_SUBDIRS) $(MCA_opal_installdirs_DSO_SUBDIRS)
$(MCA_opal_memchecker_DSO_SUBDIRS) $(MCA_opal_memcpy_DSO_SUBDIRS)
$(MCA_opal_memory_DSO_SUBDIRS) $(MCA_opal_mpool_DSO_SUBDIRS)
$(MCA_opal_pmix_DSO_SUBDIRS) $(MCA_opal_pstat_DSO_SUBDIRS)
$(MCA_opal_rcache_DSO_SUBDIRS) $(MCA_opal_sec_DSO_SUBDIRS)
$(MCA_opal_shmem_DSO_SUBDIRS) $(MCA_opal_timer_DSO_SUBDIRS)'
MCA_opal_FRAMEWORK_COMPONENT_STATIC_SUBDIRS='$(MCA_opal_common_STATIC_SUBDIRS)
$(MCA_opal_allocator_STATIC_SUBDIRS)
$(MCA_opal_backtrace_STATIC_SUBDIRS)
$(MCA_opal_btl_STATIC_SUBDIRS) $(MCA_opal_dl_STATIC_SUBDIRS)
$(MCA_opal_event_STATIC_SUBDIRS) $(MCA_opal_hwloc_STATIC_SUBDIRS)
$(MCA_opal_if_STATIC_SUBDIRS) $(MCA_opal_installdirs_STATIC_SUBDIRS)
$(MCA_opal_memchecker_STATIC_SUBDIRS)
$(MCA_opal_memcpy_STATIC_SUBDIRS)
$(MCA_opal_memory_STATIC_SUBDIRS) $(MCA_opal_mpool_STATIC_SUBDIRS)
$(MCA_opal_pmix_STATIC_SUBDIRS) $(MCA_opal_pstat_STATIC_SUBDIRS)
$(MCA_opal_rcache_STATIC_SUBDIRS) $(MCA_opal_sec_STATIC_SUBDIRS)
$(MCA_opal_shmem_STATIC_SUBDIRS) $(MCA_opal_timer_STATIC_SUBDIRS)'
MCA_opal_FRAMEWORK_LIBS=' $(MCA_opal_common_STATIC_LTLIBS)
mca/allocator/libmca_allocator.la <http://libmca_allocator.la>
$(MCA_opal_allocator_STATIC_LTLIBS)
mca/backtrace/libmca_backtrace.la <http://libmca_backtrace.la>
$(MCA_opal_backtrace_STATIC_LTLIBS)
mca/btl/libmca_btl.la <http://libmca_btl.la>
$(MCA_opal_btl_STATIC_LTLIBS) mca/dl/libmca_dl.la
<http://libmca_dl.la>
$(MCA_opal_dl_STATIC_LTLIBS) mca/event/libmca_event.la
<http://libmca_event.la>
$(MCA_opal_event_STATIC_LTLIBS) mca/hwloc/libmca_hwloc.la
<http://libmca_hwloc.la>
$(MCA_opal_hwloc_STATIC_LTLIBS) mca/if/libmca_if.la
<http://libmca_if.la>
$(MCA_opal_if_STATIC_LTLIBS) mca/installdirs/libmca_installdirs.la
<http://libmca_installdirs.la>
$(MCA_opal_installdirs_STATIC_LTLIBS)
mca/memchecker/libmca_memchecker.la <http://libmca_memchecker.la>
$(MCA_opal_memchecker_STATIC_LTLIBS) mca/memcpy/libmca_memcpy.la
<http://libmca_memcpy.la>
$(MCA_opal_memcpy_STATIC_LTLIBS) mca/memory/libmca_memory.la
<http://libmca_memory.la>
$(MCA_opal_memory_STATIC_LTLIBS) mca/mpool/libmca_mpool.la
<http://libmca_mpool.la>
$(MCA_opal_mpool_STATIC_LTLIBS) mca/pmix/libmca_pmix.la
<http://libmca_pmix.la>
$(MCA_opal_pmix_STATIC_LTLIBS) mca/pstat/libmca_pstat.la
<http://libmca_pstat.la>
$(MCA_opal_pstat_STATIC_LTLIBS) mca/rcache/libmca_rcache.la
<http://libmca_rcache.la>
$(MCA_opal_rcache_STATIC_LTLIBS) mca/sec/libmca_sec.la
<http://libmca_sec.la>
$(MCA_opal_sec_STATIC_LTLIBS) mca/shmem/libmca_shmem.la
<http://libmca_shmem.la>
$(MCA_opal_shmem_STATIC_LTLIBS) mca/timer/libmca_timer.la
<http://libmca_timer.la>
$(MCA_opal_timer_STATIC_LTLIBS)'
...
MCA_opal_pmix_ALL_COMPONENTS=' s1 cray s2 pmix112 external'
MCA_opal_pmix_ALL_SUBDIRS=' mca/pmix/s1 mca/pmix/cray mca/pmix/s2
mca/pmix/pmix112 mca/pmix/external'
MCA_opal_pmix_DSO_COMPONENTS=''
MCA_opal_pmix_DSO_SUBDIRS=''
MCA_opal_pmix_STATIC_COMPONENTS=''
MCA_opal_pmix_STATIC_LTLIBS=''
MCA_opal_pmix_STATIC_SUBDIRS=''
...
opal_pmix_ext_CPPFLAGS=''
opal_pmix_ext_LDFLAGS=''
opal_pmix_ext_LIBS=''
opal_pmix_pmix112_CPPFLAGS=''
opal_pmix_pmix112_LIBS=''
...
I've attached the config.log files for pmix.
tyr openmpi-2.0.0 142 tar zvft pmix_config.log.tar.gz
-rw-r--r-- root/root 136291 2016-04-25 08:05:34
openmpi-v2.x-dev-1290-gbd0e4e1-SunOS.sparc.64_cc/opal/mca/pmix/pmix112/pmix/config.log
-rw-r--r-- root/root 528808 2016-04-25 08:07:54
openmpi-v2.x-dev-1290-gbd0e4e1-SunOS.sparc.64_gcc/opal/mca/pmix/pmix112/pmix/config.log
tyr openmpi-2.0.0 143
I've also attached the output for the broken execution of
"spawn_multiple_master" for my gcc-version of Open MPI.
"spawn_master" works as expected with my gcc-version of Open MPI.
Hopefully you can fix the problem.
Kind regards and thank you very much for your help
Siegmar
Am 23.04.2016 um 21:34 schrieb Siegmar Gross:
Hi Gilles,
I don't know what happened, but the files are not available now
and they were definitely available when I answered the email
from
Ralph. The files also have a different timestamp now. This is an
extract from my email to Ralph for Solaris Sparc.
-rwxr-xr-x 1 root root 977 Apr 19 19:49 mca_plm_rsh.la
<http://mca_plm_rsh.la>
-rwxr-xr-x 1 root root 153280 Apr 19 19:49 mca_plm_rsh.so
-rwxr-xr-x 1 root root 1007 Apr 19 19:47
mca_pmix_pmix112.la <http://mca_pmix_pmix112.la>
-rwxr-xr-x 1 root root 1400512 Apr 19 19:47 mca_pmix_pmix112.so
-rwxr-xr-x 1 root root 971 Apr 19 19:52 mca_pml_cm.la
<http://mca_pml_cm.la>
-rwxr-xr-x 1 root root 342440 Apr 19 19:52 mca_pml_cm.so
Now I have the following output for these files.
-rwxr-xr-x 1 root root 976 Apr 19 19:58 mca_plm_rsh.la
<http://mca_plm_rsh.la>
-rwxr-xr-x 1 root root 319816 Apr 19 19:58 mca_plm_rsh.so
-rwxr-xr-x 1 root root 970 Apr 19 20:00 mca_pml_cm.la
<http://mca_pml_cm.la>
-rwxr-xr-x 1 root root 1507440 Apr 19 20:00 mca_pml_cm.so
I'll try to find out what happened next week when I'm back in
my office.
Kind regards
Siegmar
Am 23.04.16 um 02:12 schrieb Gilles Gouaillardet:
Siegmar,
I will try to reproduce this on my solaris11 x86_64 vm
In the mean time, can you please double check
mca_pmix_pmix_pmix112.so
is a 64 bits library ?
(E.g, confirm "-m64" was correctly passed to pmix)
Cheers,
Gilles
On Friday, April 22, 2016, Siegmar Gross
<siegmar.gr...@informatik.hs-fulda.de
<mailto:siegmar.gr...@informatik.hs-fulda.de>> wrote:
Hi Ralph,
I've already used "-enable-debug". "SYSTEM_ENV" is
"SunOS" or
"Linux" and "MACHINE_ENV" is "sparc" or "x86_84".
mkdir
openmpi-v2.x-dev-1280-gc110ae8-${SYSTEM_ENV}.${MACHINE_ENV}.64_gcc
cd
openmpi-v2.x-dev-1280-gc110ae8-${SYSTEM_ENV}.${MACHINE_ENV}.64_gcc
../openmpi-v2.x-dev-1280-gc110ae8/configure \
--prefix=/usr/local/openmpi-2.0.0_64_gcc \
--libdir=/usr/local/openmpi-2.0.0_64_gcc/lib64 \
--with-jdk-bindir=/usr/local/jdk1.8.0/bin \
--with-jdk-headers=/usr/local/jdk1.8.0/include \
JAVA_HOME=/usr/local/jdk1.8.0 \
LDFLAGS="-m64" CC="gcc" CXX="g++" FC="gfortran" \
CFLAGS="-m64" CXXFLAGS="-m64" FCFLAGS="-m64" \
CPP="cpp" CXXCPP="cpp" \
--enable-mpi-cxx \
--enable-cxx-exceptions \
--enable-mpi-java \
--enable-heterogeneous \
--enable-mpi-thread-multiple \
--with-hwloc=internal \
--without-verbs \
--with-wrapper-cflags="-std=c11 -m64" \
--with-wrapper-cxxflags="-m64" \
--with-wrapper-fcflags="-m64" \
--enable-debug \
|& tee log.configure.$SYSTEM_ENV.$MACHINE_ENV.64_gcc
mkdir
openmpi-v2.x-dev-1280-gc110ae8-${SYSTEM_ENV}.${MACHINE_ENV}.64_cc
cd
openmpi-v2.x-dev-1280-gc110ae8-${SYSTEM_ENV}.${MACHINE_ENV}.64_cc
../openmpi-v2.x-dev-1280-gc110ae8/configure \
--prefix=/usr/local/openmpi-2.0.0_64_cc \
--libdir=/usr/local/openmpi-2.0.0_64_cc/lib64 \
--with-jdk-bindir=/usr/local/jdk1.8.0/bin \
--with-jdk-headers=/usr/local/jdk1.8.0/include \
JAVA_HOME=/usr/local/jdk1.8.0 \
LDFLAGS="-m64" CC="cc" CXX="CC" FC="f95" \
CFLAGS="-m64" CXXFLAGS="-m64 -library=stlport4"
FCFLAGS="-m64" \
CPP="cpp" CXXCPP="cpp" \
--enable-mpi-cxx \
--enable-cxx-exceptions \
--enable-mpi-java \
--enable-heterogeneous \
--enable-mpi-thread-multiple \
--with-hwloc=internal \
--without-verbs \
--with-wrapper-cflags="-m64" \
--with-wrapper-cxxflags="-m64 -library=stlport4" \
--with-wrapper-fcflags="-m64" \
--with-wrapper-ldflags="" \
--enable-debug \
|& tee log.configure.$SYSTEM_ENV.$MACHINE_ENV.64_cc
Kind regards
Siegmar
Am 21.04.2016 um 18:18 schrieb Ralph Castain:
Can you please rebuild OMPI with -enable-debug in
the configure
cmd? It will let us see more error output
On Apr 21, 2016, at 8:52 AM, Siegmar Gross
<siegmar.gr...@informatik.hs-fulda.de> wrote:
Hi Ralph,
I don't see any additional information.
tyr hello_1 108 mpiexec -np 4 --host
tyr,sunpc1,linpc1,ruester -mca
mca_base_component_show_load_errors 1
hello_1_mpi
[tyr.informatik.hs-fulda.de:06211
<http://tyr.informatik.hs-fulda.de:06211>
<http://tyr.informatik.hs-fulda.de:06211>]
[[48741,0],0]
ORTE_ERROR_LOG: Not found in file
../../../../../openmpi-v2.x-dev-1280-gc110ae8/orte/mca/ess/hnp/ess_hnp_module.c
at line 638
--------------------------------------------------------------------------
It looks like orte_init failed for some
reason; your
parallel process is
likely to abort. There are many reasons that
a parallel
process can
fail during orte_init; some of which are due
to configuration or
environment problems. This failure appears to
be an
internal failure;
here's some additional information (which may
only be
relevant to an
Open MPI developer):
opal_pmix_base_select failed
--> Returned value Not found (-13) instead of
ORTE_SUCCESS
--------------------------------------------------------------------------
tyr hello_1 109 mpiexec -np 4 --host
tyr,sunpc1,linpc1,ruester -mca
mca_base_component_show_load_errors 1 -mca
pmix_base_verbose
10 -mca pmix_server_verbose 5 hello_1_mpi
[tyr.informatik.hs-fulda.de:06212
<http://tyr.informatik.hs-fulda.de:06212>
<http://tyr.informatik.hs-fulda.de:06212>]
mca: base:
components_register: registering framework
pmix components
[tyr.informatik.hs-fulda.de:06212
<http://tyr.informatik.hs-fulda.de:06212>
<http://tyr.informatik.hs-fulda.de:06212>]
mca: base:
components_open: opening pmix components
[tyr.informatik.hs-fulda.de:06212
<http://tyr.informatik.hs-fulda.de:06212>
<http://tyr.informatik.hs-fulda.de:06212>]
mca:base:select:
Auto-selecting pmix components
[tyr.informatik.hs-fulda.de:06212
<http://tyr.informatik.hs-fulda.de:06212>
<http://tyr.informatik.hs-fulda.de:06212>]
mca:base:select:(
pmix) No component selected!
[tyr.informatik.hs-fulda.de:06212
<http://tyr.informatik.hs-fulda.de:06212>
<http://tyr.informatik.hs-fulda.de:06212>]
[[48738,0],0]
ORTE_ERROR_LOG: Not found in file
../../../../../openmpi-v2.x-dev-1280-gc110ae8/orte/mca/ess/hnp/ess_hnp_module.c
at line 638
--------------------------------------------------------------------------
It looks like orte_init failed for some
reason; your
parallel process is
likely to abort. There are many reasons that
a parallel
process can
fail during orte_init; some of which are due
to configuration or
environment problems. This failure appears to
be an
internal failure;
here's some additional information (which may
only be
relevant to an
Open MPI developer):
opal_pmix_base_select failed
--> Returned value Not found (-13) instead of
ORTE_SUCCESS
--------------------------------------------------------------------------
tyr hello_1 110
Kind regards
Siegmar
Am 21.04.2016 um 17:24 schrieb Ralph Castain:
Hmmm…it looks like you built the right
components, but
they are not being picked up. Can you run
your mpiexec
command again, adding “-mca
mca_base_component_show_load_errors 1” to
the cmd line?
On Apr 21, 2016, at 8:16 AM, Siegmar
Gross
<siegmar.gr...@informatik.hs-fulda.de>
wrote:
Hi Ralph,
I have attached ompi_info output for
both compilers
from my
sparc machine and the listings for
both compilers
from the
<prefix>/lib/openmpi directories.
Hopefully that
helps to
find the problem.
hermes tmp 3 tar zvft
openmpi-2.x_info.tar.gz
-rw-r--r-- root/root 10969
2016-04-21 17:06
ompi_info_SunOS_sparc_cc.txt
-rw-r--r-- root/root 11044
2016-04-21 17:06
ompi_info_SunOS_sparc_gcc.txt
-rw-r--r-- root/root 71252
2016-04-21 17:02
lib64_openmpi.txt
hermes tmp 4
Kind regards and thank you very much
once more for
your help
Siegmar
Am 21.04.2016 um 15:54 schrieb Ralph
Castain:
Odd - it would appear that none of
the pmix
components built? Can you send
along the output from ompi_info?
Or just send a
listing of the files in the
<prefix>/lib/openmpi directory?
On Apr 21, 2016, at 1:27 AM,
Siegmar Gross
<siegmar.gr...@informatik.hs-fulda.de
<mailto:siegmar.gr...@informatik.hs-fulda.de>>
wrote:
Hi Ralph,
Am 21.04.2016 um 00:18 schrieb
Ralph Castain:
Could you please rerun
these test and
add “-mca pmix_base_verbose
10
-mca pmix_server_verbose
5” to your cmd
line? I need to see why the
pmix components failed.
tyr spawn 111 mpiexec -np 1
--host
tyr,sunpc1,linpc1,ruester -mca
pmix_base_verbose 10 -mca
pmix_server_verbose 5
spawn_multiple_master
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de/>:26652] mca:
base: components_register:
registering
framework pmix components
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de/>:26652] mca:
base: components_open: opening
pmix components
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de/>:26652]
mca:base:select:
Auto-selecting pmix components
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de/>:26652]
mca:base:select:( pmix) No
component selected!
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de/>:26652]
[[52794,0],0] ORTE_ERROR_LOG:
Not found in file
../../../../../openmpi-v2.x-dev-1280-gc110ae8/orte/mca/ess/hnp/ess_hnp_module.c
at line 638
--------------------------------------------------------------------------
It looks like orte_init failed
for some
reason; your parallel process is
likely to abort. There are
many reasons
that a parallel process can
fail during orte_init; some of
which are due
to configuration or
environment problems. This
failure appears
to be an internal failure;
here's some additional
information (which
may only be relevant to an
Open MPI developer):
opal_pmix_base_select failed
--> Returned value Not found
(-13) instead
of ORTE_SUCCESS
--------------------------------------------------------------------------
tyr spawn 112
tyr hello_1 116 mpiexec -np 1
--host
tyr,sunpc1,linpc1,ruester -mca
pmix_base_verbose 10 -mca
pmix_server_verbose 5
hello_1_mpi
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de/>:27261] mca:
base: components_register:
registering
framework pmix components
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de/>:27261] mca:
base: components_open: opening
pmix components
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de/>:27261]
mca:base:select:
Auto-selecting pmix components
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de/>:27261]
mca:base:select:( pmix) No
component selected!
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de/>:27261]
[[52315,0],0] ORTE_ERROR_LOG:
Not found in file
../../../../../openmpi-v2.x-dev-1280-gc110ae8/orte/mca/ess/hnp/ess_hnp_module.c
at line 638
--------------------------------------------------------------------------
It looks like orte_init failed
for some
reason; your parallel process is
likely to abort. There are
many reasons
that a parallel process can
fail during orte_init; some of
which are due
to configuration or
environment problems. This
failure appears
to be an internal failure;
here's some additional
information (which
may only be relevant to an
Open MPI developer):
opal_pmix_base_select failed
--> Returned value Not found
(-13) instead
of ORTE_SUCCESS
--------------------------------------------------------------------------
tyr hello_1 117
Thank you very much for your
help.
Kind regards
Siegmar
Thanks
Ralph
On Apr 20, 2016, at
10:12 AM,
Siegmar Gross
<siegmar.gr...@informatik.hs-fulda.de
<mailto:siegmar.gr...@informatik.hs-fulda.de>>
wrote:
Hi,
I have built
openmpi-v2.x-dev-1280-gc110ae8 on my
machines
(Solaris 10 Sparc,
Solaris 10
x86_64, and openSUSE
Linux
12.1 x86_64) with
gcc-5.1.0 and Sun
C 5.13. Unfortunately
I get
runtime errors for
some programs.
Sun C 5.13:
===========
For all my test
programs I get the
same error on Solaris
Sparc and
Solaris x86_64, while
the programs
work fine on Linux.
tyr hello_1 115
mpiexec -np 2
hello_1_mpi
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>:22373]
[[61763,0],0]
ORTE_ERROR_LOG: Not
found in file
../../../../../openmpi-v2.x-dev-1280-gc110ae8/orte/mca/ess/hnp/ess_hnp_module.c
at line 638
--------------------------------------------------------------------------
It looks like
orte_init failed for
some reason; your
parallel process is
likely to abort.
There are many
reasons that a
parallel process can
fail during orte_init;
some of which
are due to
configuration or
environment problems.
This failure
appears to be an
internal failure;
here's some additional
information
(which may only be
relevant to an
Open MPI developer):
opal_pmix_base_select
failed
--> Returned value Not
found (-13)
instead of ORTE_SUCCESS
--------------------------------------------------------------------------
tyr hello_1 116
GCC-5.1.0:
==========
tyr spawn 121 mpiexec
-np 1 --host
tyr,sunpc1,linpc1,ruester
spawn_multiple_master
Parent process 0
running on
tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
I create 3 slave
processes.
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>:25366]
PMIX ERROR:
UNPACK-PAST-END in file
../../../../../../openmpi-v2.x-dev-1280-gc110ae8/opal/mca/pmix/pmix112/pmix/src/server/pmix_server_ops.c
at line 829
[tyr.informatik.hs-fulda.de
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>
<http://tyr.informatik.hs-fulda.de>:25366]
PMIX ERROR:
UNPACK-PAST-END in file
../../../../../../openmpi-v2.x-dev-1280-gc110ae8/opal/mca/pmix/pmix112/pmix/src/server/pmix_server.c
at line 2176
[tyr:25377] *** An
error occurred in
MPI_Comm_spawn_multiple
[tyr:25377] ***
reported by process
[3308257281,0]
[tyr:25377] *** on
communicator
MPI_COMM_WORLD
[tyr:25377] ***
MPI_ERR_SPAWN: could
not spawn processes
[tyr:25377] ***
MPI_ERRORS_ARE_FATAL
(processes in this
communicator will
now abort,
[tyr:25377] *** and
potentially
your MPI job)
tyr spawn 122
I would be grateful if
somebody can
fix the problems.
Thank you very
much for any help in
advance.
Kind regards
Siegmar
<hello_1_mpi.c><spawn_multiple_master.c>_______________________________________________
users mailing list
us...@open-mpi.org
<mailto:us...@open-mpi.org>
Subscription:
http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/28983.php
_______________________________________________
users mailing list
us...@open-mpi.org
<mailto:us...@open-mpi.org>
Subscription:
http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this
post:
http://www.open-mpi.org/community/lists/users/2016/04/28986.php
_______________________________________________
users mailing list
us...@open-mpi.org
<mailto:us...@open-mpi.org>
Subscription:
http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this
post:
http://www.open-mpi.org/community/lists/users/2016/04/28987.php
_______________________________________________
users mailing list
us...@open-mpi.org
Subscription:
http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/28988.php
<openmpi-2.x_info.tar.gz>_______________________________________________
users mailing list
us...@open-mpi.org
Subscription:
http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/28989.php
_______________________________________________
users mailing list
us...@open-mpi.org
Subscription:
http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/28990.php
_______________________________________________
users mailing list
us...@open-mpi.org
Subscription:
http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/28991.php
_______________________________________________
users mailing list
us...@open-mpi.org
Subscription:
http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/28992.php
_______________________________________________
users mailing list
us...@open-mpi.org
Subscription:
http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/28993.php
_______________________________________________
users mailing list
us...@open-mpi.org
Subscription:
http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/28999.php
_______________________________________________
users mailing list
us...@open-mpi.org
Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/29009.php
_______________________________________________
users mailing list
us...@open-mpi.org
Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/29033.php
_______________________________________________
users mailing list
us...@open-mpi.org
Subscription: https://www.open-mpi.org/mailman/listinfo.cgi/users
Link to this post:
http://www.open-mpi.org/community/lists/users/2016/04/29038.php
/* The program demonstrates how to spawn some dynamic MPI processes.
* This version uses one master process which creates two types of
* slave processes with different argument vectors. The argument
* vector contains the parameters passed to the program. Basically it
* corresponds to a normal argument vector for C programs. The main
* difference is that p_argv[0] contains the first parameter and not
* the name of the program. The function which you will use to spawn
* processes will build a normal argument vector consisting of the
* program name followed by the parameters in "p_argv".
*
* A process or a group of processes can create another group of
* processes with "MPI_Comm_spawn ()" or "MPI_Comm_spawn_multiple ()".
* In general it is best (better performance) to start all processes
* statically with "mpiexec" via the command line. If you want to use
* dynamic processes you will normally have one master process which
* starts a lot of slave processes. In some cases it may be useful to
* enlarge a group of processes, e.g., if the MPI universe provides
* more virtual cpu's than the current number of processes and the
* program may benefit from additional processes. You will use
* "MPI_Comm_spwan_multiple ()" if you must start different
* programs or if you want to start the same program with different
* parameters.
*
* There are some reasons to prefer "MPI_Comm_spawn_multiple ()"
* instead of calling "MPI_Comm_spawn ()" multiple times. If you
* spawn new (child) processes they start up like any MPI application,
* i.e., they call "MPI_Init ()" and can use the communicator
* MPI_COMM_WORLD afterwards. This communicator contains only the
* child processes which have been created with the same call of
* "MPI_Comm_spawn ()" and which is distinct from MPI_COMM_WORLD
* of the parent process or processes created in other calls of
* "MPI_Comm_spawn ()". The natural communication mechanism between
* the groups of parent and child processes is via an
* inter-communicator which will be returned from the above
* MPI functions to spawn new processes. The local group of the
* inter-communicator contains the parent processes and the remote
* group contains the child processes. The child processes can get
* the same inter-communicator calling "MPI_Comm_get_parent ()".
* Now it is obvious that calling "MPI_Comm_spawn ()" multiple
* times will create many sets of children with different
* communicators MPI_COMM_WORLD whereas "MPI_Comm_spawn_multiple ()"
* creates child processes with a single MPI_COMM_WORLD. Furthermore
* spawning several processes in one call may be faster than spawning
* them sequentially and perhaps even the communication between
* processes spawned at the same time may be faster than communication
* between sequentially spawned processes.
*
* For collective operations it is sometimes easier if all processes
* belong to the same intra-communicator. You can use the function
* "MPI_Intercomm_merge ()" to merge the local and remote group of
* an inter-communicator into an intra-communicator.
*
*
* Compiling:
* Store executable(s) into local directory.
* mpicc -o <program name> <source code file name>
*
* Store executable(s) into predefined directories.
* make
*
* Make program(s) automatically on all specified hosts. You must
* edit the file "make_compile" and specify your host names before
* you execute it.
* make_compile
*
* Running:
* LAM-MPI:
* mpiexec -boot -np <number of processes> <program name>
* or
* mpiexec -boot \
* -host <hostname> -np <number of processes> <program name> : \
* -host <hostname> -np <number of processes> <program name>
* or
* mpiexec -boot [-v] -configfile <application file>
* or
* lamboot [-v] [<host file>]
* mpiexec -np <number of processes> <program name>
* or
* mpiexec [-v] -configfile <application file>
* lamhalt
*
* OpenMPI:
* "host1", "host2", and so on can all have the same name,
* if you want to start a virtual computer with some virtual
* cpu's on the local host. The name "localhost" is allowed
* as well.
*
* mpiexec -np <number of processes> <program name>
* or
* mpiexec --host <host1,host2,...> \
* -np <number of processes> <program name>
* or
* mpiexec -hostfile <hostfile name> \
* -np <number of processes> <program name>
* or
* mpiexec -app <application file>
*
* Cleaning:
* local computer:
* rm <program name>
* or
* make clean_all
* on all specified computers (you must edit the file "make_clean_all"
* and specify your host names before you execute it.
* make_clean_all
*
*
* File: spawn_multiple_master.c Author: S. Gross
* Date: 28.09.2013
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#define NUM_PROGS 2 /* # of programs */
#define NUM_SLAVES_1 1 /* # of slave processes, type 1 */
#define NUM_SLAVES_2 2 /* # of slave processes, type 2 */
#define SLAVE_PROG_1 "spawn_slave" /* slave program name, type 1 */
#define SLAVE_PROG_2 "spawn_slave" /* slave program name, type 2 */
int main (int argc, char *argv[])
{
MPI_Comm COMM_CHILD_PROCESSES; /* inter-communicator */
MPI_Info array_of_infos[NUM_PROGS]; /* startup hints for each cmd */
int ntasks_world, /* # of tasks in MPI_COMM_WORLD */
ntasks_local, /* COMM_CHILD_PROCESSES local */
ntasks_remote, /* COMM_CHILD_PROCESSES remote */
mytid, /* my task id */
namelen, /* length of processor name */
array_of_n_procs[NUM_PROGS], /* number of processes */
count_slaves, /* total number of slaves */
i; /* loop variable */
char processor_name[MPI_MAX_PROCESSOR_NAME],
*array_of_commands[NUM_PROGS],
**array_of_argvs[NUM_PROGS],
*p_argv_1[] = {"program type 1", NULL},
*p_argv_2[] = {"program type 2", "another parameter", NULL};
MPI_Init (&argc, &argv);
MPI_Comm_rank (MPI_COMM_WORLD, &mytid);
MPI_Comm_size (MPI_COMM_WORLD, &ntasks_world);
/* check that only the master process is running in MPI_COMM_WORLD. */
if (ntasks_world > 1)
{
if (mytid == 0)
{
fprintf (stderr, "\n\nError: Too many processes (only one "
"process allowed).\n"
"Usage:\n"
" mpiexec %s\n\n",
argv[0]);
}
MPI_Finalize ();
exit (EXIT_SUCCESS);
}
MPI_Get_processor_name (processor_name, &namelen);
count_slaves = 0;
for (i = 0; i < NUM_PROGS; ++i)
{
if ((i % 2) == 0)
{
array_of_commands[i] = SLAVE_PROG_1;
array_of_argvs[i] = p_argv_1;
array_of_n_procs[i] = NUM_SLAVES_1;
array_of_infos[i] = MPI_INFO_NULL;
count_slaves += NUM_SLAVES_1;
}
else
{
array_of_commands[i] = SLAVE_PROG_2;
array_of_argvs[i] = p_argv_2;
array_of_n_procs[i] = NUM_SLAVES_2;
array_of_infos[i] = MPI_INFO_NULL;
count_slaves += NUM_SLAVES_2;
}
}
printf ("\nParent process %d running on %s\n"
" I create %d slave processes.\n\n",
mytid, processor_name, count_slaves);
MPI_Comm_spawn_multiple (NUM_PROGS, array_of_commands,
array_of_argvs, array_of_n_procs,
array_of_infos, 0, MPI_COMM_WORLD,
&COMM_CHILD_PROCESSES, MPI_ERRCODES_IGNORE);
MPI_Comm_size (COMM_CHILD_PROCESSES, &ntasks_local);
MPI_Comm_remote_size (COMM_CHILD_PROCESSES, &ntasks_remote);
printf ("Parent process %d: "
"tasks in MPI_COMM_WORLD: %d\n"
" tasks in COMM_CHILD_PROCESSES local "
"group: %d\n"
" tasks in COMM_CHILD_PROCESSES remote "
"group: %d\n\n",
mytid, ntasks_world, ntasks_local, ntasks_remote);
MPI_Comm_free (&COMM_CHILD_PROCESSES);
MPI_Finalize ();
return EXIT_SUCCESS;
}