Hi there,
I'm running into an issue where mpirun isn't terminating when my executable has a nonzero exit status - instead it's hanging indefinitely. I'm attaching my process tree, the error message from the application, and the messages printed to stderr. Please let me know what I can do. Thanks, - Lee-Ping === Process Tree === leeping@vsp-compute-13:~$ ps xjf PPID PID PGID SID TTY TPGID STAT UID TIME COMMAND 31969 31977 31969 31969 ? -1 S 48618 0:00 sshd: leeping@pts/1 31977 31978 31978 31978 pts/1 32038 Ss 48618 0:00 \_ -bash 31978 32038 32038 31978 pts/1 32038 R+ 48618 0:00 \_ ps xjf 23667 29307 29307 29307 ? -1 Ss 48618 0:00 /bin/bash /home/leeping/temp/leeping-workers/10276/worker1.sh 29307 29308 29307 29307 ? -1 S 48618 0:00 \_ /bin/bash /home/leeping/temp/leeping-workers/10276/worker2.sh 29308 29425 29307 29307 ? -1 S 48618 0:00 \_ ./work_queue_worker -d all --cores 6 -t 86400s localhost 9876 29425 26245 26245 29307 ? -1 S 48618 0:00 | \_ sh -c optimize-geometry.py initial.xyz --method b3lyp --basis "6-31g(d)" --charge 0 --mult 1 &> optimize.log 26245 26246 26245 29307 ? -1 Sl 48618 0:01 | \_ /home/leeping/local/bin/python /home/leeping/local/bin/optimize-geometry.py initial.xyz --method b3lyp --basis 6-31g(d) --charge 0 --mult 1 26246 27834 26245 29307 ? -1 S 48618 0:00 | \_ /bin/sh -c qchem42 -np 6 -save optimize.in optimize.out optimize.d 2> optimize.err 27834 27835 26245 29307 ? -1 S 48618 0:00 | \_ /bin/bash /home/leeping/opt/bin/qchem42 -np 6 -save optimize.in optimize.out optimize.d 27835 27897 26245 29307 ? -1 S 48618 0:00 | \_ /bin/csh -f /opt/scratch/leeping/opt/qchem-4.2/bin/qchem -np 6 -nt 1 -save optimize.in optimize.out optimize.d 27897 27921 26245 29307 ? -1 S 48618 0:00 | \_ /bin/csh -f /opt/scratch/leeping/opt/qchem-4.2/bin/parallel.csh optimize.in 6 0 ./optimize.d/ 27897 27921 27926 26245 29307 ? -1 Sl 48618 0:00 | \_ /opt/scratch/leeping/opt/qchem-4.2/ext-libs/openmpi/bin/mpirun -np 6 /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe .optimize.in.27897.qcin.1 ./optimize.d/ === Application Error Message === 100 -843.2762335150 5.69E-08 00000 Convergence failure Q-Chem fatal error occurred in module /home/leeping/src/qchem/scfman/scfman.C, line 4377: SCF failed to converge Sat Sep 20 23:57:37 2014 === Standard error === leeping@vsp-compute-13:/opt/scratch/leeping/worker-48618-29425/t.62$ cat optimize.err [vsp-compute-13:27929] *** Process received signal *** [vsp-compute-13:27929] Signal: Aborted (6) [vsp-compute-13:27929] Signal code: (-6) [vsp-compute-13:27932] *** Process received signal *** [vsp-compute-13:27932] Signal: Aborted (6) [vsp-compute-13:27932] Signal code: (-6) [vsp-compute-13:27934] *** Process received signal *** [vsp-compute-13:27934] Signal: Aborted (6) [vsp-compute-13:27934] Signal code: (-6) [vsp-compute-13:27928] *** Process received signal *** [vsp-compute-13:27928] Signal: Aborted (6) [vsp-compute-13:27928] Signal code: (-6) [vsp-compute-13:27936] *** Process received signal *** [vsp-compute-13:27936] Signal: Aborted (6) [vsp-compute-13:27936] Signal code: (-6) [vsp-compute-13:27930] *** Process received signal *** [vsp-compute-13:27930] Signal: Aborted (6) [vsp-compute-13:27930] Signal code: (-6) [vsp-compute-13:27932] [ 0] /lib64/libpthread.so.0[0x3464c0eb70] [vsp-compute-13:27932] [ 1] [vsp-compute-13:27928] [ 0] /lib64/libpthread.so.0[0x3464c0eb70] [vsp-compute-13:27928] [ 1] /lib64/libc.so.6(gsignal+0x35)[0x3464430265] [vsp-compute-13:27928] [ 2] [vsp-compute-13:27934] [ 0] /lib64/libpthread.so.0[0x3464c0eb70] [vsp-compute-13:27934] [ 1] /lib64/libc.so.6(gsignal+0x35)[0x3464430265] [vsp-compute-13:27934] [ 2] [vsp-compute-13:27929] [ 0] /lib64/libpthread.so.0[0x3464c0eb70] [vsp-compute-13:27929] [ 1] /lib64/libc.so.6(gsignal+0x35)[0x3464430265] [vsp-compute-13:27929] [ 2] /lib64/libc.so.6(gsignal+0x35)[0x3464430265] [vsp-compute-13:27932] [ 2] /lib64/libc.so.6(abort+0x110)[0x3464431d10] [vsp-compute-13:27932] [ 3] /lib64/libc.so.6(abort+0x110)[0x3464431d10] [vsp-compute-13:27928] [ 3] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0xc304ca6] [vsp-compute-13:27928] [ 4] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x41a0cf5] [vsp-compute-13:27928] [ 5] /lib64/libc.so.6(abort+0x110)[0x3464431d10] [vsp-compute-13:27934] [ 3] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0xc304ca6] [vsp-compute-13:27934] [ 4] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x41a0cf5] [vsp-compute-13:27934] [ 5] /lib64/libc.so.6(abort+0x110)[0x3464431d10] [vsp-compute-13:27929] [ 3] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0xc304ca6] [vsp-compute-13:27929] [ 4] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x41a0cf5] [vsp-compute-13:27929] [ 5] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0xc304ca6] [vsp-compute-13:27932] [ 4] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x41a0cf5] [vsp-compute-13:27932] [ 5] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x414a06e] [vsp-compute-13:27934] [ 6] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x414a06e] [vsp-compute-13:27929] [ 6] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x463392] [vsp-compute-13:27929] [ 7] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x414a06e] [vsp-compute-13:27928] [ 6] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x463392] [vsp-compute-13:27928] [ 7] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x414a06e] [vsp-compute-13:27932] [ 6] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x463392] [vsp-compute-13:27932] [ 7] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x463392] [vsp-compute-13:27934] [ 7] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45cdb0] [vsp-compute-13:27934] [ 8] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45cdb0] [vsp-compute-13:27932] [ 8] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45cdb0] [vsp-compute-13:27929] [ 8] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45cdb0] [vsp-compute-13:27928] [ 8] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45bfb6] [vsp-compute-13:27928] [ 9] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45bfb6] [vsp-compute-13:27932] [ 9] /lib64/libc.so.6(__libc_start_main+0xf4)[0x346441d994] [vsp-compute-13:27932] [10] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45bfb6] [vsp-compute-13:27929] [ 9] /lib64/libc.so.6(__libc_start_main+0xf4)[0x346441d994] [vsp-compute-13:27929] [10] /lib64/libc.so.6(__libc_start_main+0xf4)[0x346441d994] [vsp-compute-13:27928] [10] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45bfb6] [vsp-compute-13:27934] [ 9] /lib64/libc.so.6(__libc_start_main+0xf4)[0x346441d994] [vsp-compute-13:27934] [10] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x43e529] [vsp-compute-13:27928] *** End of error message *** /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x43e529] [vsp-compute-13:27929] *** End of error message *** /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x43e529] [vsp-compute-13:27932] *** End of error message *** /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x43e529] [vsp-compute-13:27934] *** End of error message *** [vsp-compute-13:27936] [ 0] /lib64/libpthread.so.0[0x3464c0eb70] [vsp-compute-13:27936] [ 1] /lib64/libc.so.6(gsignal+0x35)[0x3464430265] [vsp-compute-13:27936] [ 2] /lib64/libc.so.6(abort+0x110)[0x3464431d10] [vsp-compute-13:27936] [ 3] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0xc304ca6] [vsp-compute-13:27936] [ 4] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x41a0cf5] [vsp-compute-13:27936] [ 5] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x414a06e] [vsp-compute-13:27936] [ 6] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x463392] [vsp-compute-13:27936] [ 7] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45cdb0] [vsp-compute-13:27936] [ 8] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45bfb6] [vsp-compute-13:27936] [ 9] /lib64/libc.so.6(__libc_start_main+0xf4)[0x346441d994] [vsp-compute-13:27936] [10] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x43e529] [vsp-compute-13:27936] *** End of error message *** [vsp-compute-13:27930] [ 0] /lib64/libpthread.so.0[0x3464c0eb70] [vsp-compute-13:27930] [ 1] /lib64/libc.so.6(gsignal+0x35)[0x3464430265] [vsp-compute-13:27930] [ 2] /lib64/libc.so.6(abort+0x110)[0x3464431d10] [vsp-compute-13:27930] [ 3] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0xc304ca6] [vsp-compute-13:27930] [ 4] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x41a0cf5] [vsp-compute-13:27930] [ 5] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x414a06e] [vsp-compute-13:27930] [ 6] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x463392] [vsp-compute-13:27930] [ 7] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45cdb0] [vsp-compute-13:27930] [ 8] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x45bfb6] [vsp-compute-13:27930] [ 9] /lib64/libc.so.6(__libc_start_main+0xf4)[0x346441d994] [vsp-compute-13:27930] [10] /opt/scratch/leeping/opt/qchem-4.2/exe/qcprog.exe[0x43e529] [vsp-compute-13:27930] *** End of error message ***