openmpi1.2 (yes, I know old),python 2.6.1 blcr 0.8.5


when I attempt to cr_restart (having performed cr_checkpoint --save-all) I can 
restart the job manually with blcr on a node. but when I go through my openlava 
scheduler, the cr_restart launches mpirun, then nothing. no orted or the python 
processes that were running. the new scheduler job performing the restart puts 
in place the old machinefile and stderr and stdout files. here is what I view 
on an strace of mpirun



What problem is this pointing at?

Thanks,



-Henk



poll([{fd=5, events=POLLIN}, {fd=4, events=POLLIN}, {fd=6, events=POLLIN}, 
{fd=11, events=POLLIN}, {fd=7, events=POLLIN}, {fd=8, events=POLLIN}, {fd=9, 
events=POLLIN}, {fd=10, events=POLLIN}], 8, 1000) = 8 ([{fd=5, 
revents=POLLNVAL}, {fd=4, revents=POLLNVAL}, {fd=6, revents=POLLNVAL}, {fd=11, 
revents=POLLNVAL}, {fd=7, revents=POLLNVAL}, {fd=8, revents=POLLNVAL}, {fd=9, 
revents=POLLNVAL}, {fd=10, revents=POLLNVAL}])
rt_sigprocmask(SIG_BLOCK, [INT USR1 USR2 TERM CHLD], NULL, 8) = 0
rt_sigaction(SIGCHLD, {0x2b7ca19cb30a, [INT USR1 USR2 TERM CHLD], 
SA_RESTORER|SA_RESTART, 0x397840f790}, NULL, 8) = 0
rt_sigaction(SIGTERM, {0x2b7ca19cb30a, [INT USR1 USR2 TERM CHLD], 
SA_RESTORER|SA_RESTART, 0x397840f790}, NULL, 8) = 0
rt_sigaction(SIGINT, {0x2b7ca19cb30a, [INT USR1 USR2 TERM CHLD], 
SA_RESTORER|SA_RESTART, 0x397840f790}, NULL, 8) = 0
rt_sigaction(SIGUSR1, {0x2b7ca19cb30a, [INT USR1 USR2 TERM CHLD], 
SA_RESTORER|SA_RESTART, 0x397840f790}, NULL, 8) = 0
rt_sigaction(SIGUSR2, {0x2b7ca19cb30a, [INT USR1 USR2 TERM CHLD], 
SA_RESTORER|SA_RESTART, 0x397840f790}, NULL, 8) = 0
sched_yield()                           = 0
rt_sigprocmask(SIG_BLOCK, [INT USR1 USR2 TERM CHLD], NULL, 8) = 0
rt_sigaction(SIGCHLD, {0x2b7ca19cb30a, [INT USR1 USR2 TERM CHLD], 
SA_RESTORER|SA_RESTART, 0x397840f790}, NULL, 8) = 0
rt_sigaction(SIGTERM, {0x2b7ca19cb30a, [INT USR1 USR2 TERM CHLD], 
SA_RESTORER|SA_RESTART, 0x397840f790}, NULL, 8) = 0
rt_sigaction(SIGINT, {0x2b7ca19cb30a, [INT USR1 USR2 TERM CHLD], 
SA_RESTORER|SA_RESTART, 0x397840f790}, NULL, 8) = 0
rt_sigaction(SIGUSR1, {0x2b7ca19cb30a, [INT USR1 USR2 TERM CHLD], 
SA_RESTORER|SA_RESTART, 0x397840f790}, NULL, 8) = 0
rt_sigaction(SIGUSR2, {0x2b7ca19cb30a, [INT USR1 USR2 TERM CHLD], 
SA_RESTORER|SA_RESTART, 0x397840f790}, NULL, 8) = 0




Reply via email to