Am 05.07.2012 um 00:38 schrieb Dominik Goeddeke:

> no idea of Rocks, but with PBS and SLURM, I always do this directly in the 
> job submission script. Below is an example of an admittedly spaghetti-code 
> script that       does this -- assuming proper (un)commenting --  for PBS and 
> SLURM

For SLURM, Torque and GridEngine I would suggest to use the builtin support in 
Open MPI and MPICH2 directly. So there is no need to build a machinefile from 
the given list of selected nodes. In addition it will also provide a tight 
integration of the parallel job into the queuing system and jobs can nicely be 
removed.

Is there any reason to bypass this mechanism?

-- Reuti


>  and OpenMPI and MPICH2, for one particular machine that I have been toying 
> around with lately ... 
> 
> Dominik
> 
> #!/bin/bash
> 
> #################### PBS
> #PBS -N feast
> #PBS -l nodes=25:ppn=2
> #PBS -q batch
> #PBS -l walltime=2:00:00
> #job should not rerun if it fails
> #PBS -r n
> 
> ####### SLURM
> # @ job_name = feaststrong1
> # @ initialdir = .
> # @ output = feaststrong1_%j.out
> # @ error = feaststrong1_%j.err
> # @ total_tasks = 50
> # @ cpus_per_task = 1
> # @ wall_clock_limit = 2:00:00
> 
> # modules
> module purge
> module load gcc/4.6.2
> module load openmpi/1.5.4
> #module load mpich2/1.4.1
> 
> # cd into wdir
> cd $HOME/feast/feast/feast/applications/poisson_coproc
> 
> 
> ##### PBS with MPICH2
> # create machine files to isolate the master process
> #cat $PBS_NODEFILE > nodes.txt
> ## extract slaves
> #sort -u  nodes.txt > temp.txt
> #lines=`wc -l temp.txt | awk '{print $1}'`
> #((lines=$lines - 1))
> #tail -n $lines temp.txt > slavetemp.txt
> #cat slavetemp.txt | awk '{print $0 ":2"}' > slaves.txt
> ## extract master
> #head -n 1 temp.txt > mastertemp.txt
> #cat mastertemp.txt | awk '{print $0 ":1"}' > master.txt
> ## merge into one dual nodefile
> #cat master.txt > dual.hostfile
> #cat slaves.txt >> dual.hostfile 
> ## same for single hostfile
> #tail -n $lines temp.txt > slavetemp.txt
> #cat slavetemp.txt | awk '{print $0 ":1"}' > slaves.txt
> ## extract master
> #head -n 1 temp.txt > mastertemp.txt
> #cat mastertemp.txt | awk '{print $0 ":1"}' > master.txt
> ## merge into one single nodefile
> #cat master.txt > single.hostfile
> #cat slaves.txt >> single.hostfile
> ## and clean up
> #rm -f slavetemp.txt mastertemp.txt master.txt slaves.txt temp.txt nodes.txt
> 
> # 4 nodes
> #mpiexec -n 7 -f dual.hostfile ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np007.dat
> #mkdir arm-strongscaling-series1-L8-nodes04
> #mv feastlog.* arm-strongscaling-series1-L8-nodes04
> 
> # 7 nodes
> #mpiexec -n 13 -f dual.hostfile ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np013.dat
> #mkdir arm-strongscaling-series1-L8-nodes07
> #mv feastlog.* arm-strongscaling-series1-L8-nodes07
> 
> # 13 nodes
> #mpiexec -n 25 -f dual.hostfile ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np025.dat
> #mkdir arm-strongscaling-series1-L8-nodes13
> #mv feastlog.* arm-strongscaling-series1-L8-nodes13
> 
> # 25 nodes
> #mpiexec -n 49 -f dual.hostfile ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np049.dat
> #mkdir arm-strongscaling-series1-L8-nodes25
> #mv feastlog.* arm-strongscaling-series1-L8-nodes25
> 
> 
> ############## SLURM
> 
> # figure out which nodes we got
> srun /bin/hostname | sort > availhosts3.txt
> 
> lines=`wc -l availhosts3.txt | awk '{print $1}'`
> ((lines=$lines - 2))
> tail -n $lines availhosts3.txt > slaves3.txt
> head -n 1 availhosts3.txt > master3.txt
> cat master3.txt > hostfile3.txt
> cat slaves3.txt >> hostfile3.txt
> # DGDG: SLURM -m arbitrary not supported by OpenMPI
> #export SLURM_HOSTFILE=./hostfile3.txt
> 
> 
> # 4 nodes
> #mpirun -np 7 --hostfile hostfile3.txt ./trace.sh ./feastgpu-ompi 
> master.dat.strongscaling.m6.L8.np007.dat
> mpirun -np 7 --hostfile hostfile3.txt ./feastgpu-ompi 
> master.dat.strongscaling.m6.L8.np007.dat
> #mpiexec -n 7 -f dual.hostfile ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np007.dat
> #srun -n 7 -m arbitrary ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np007.dat
> mkdir arm-strongscaling-series1-L8-nodes04
> mv feastlog.* arm-strongscaling-series1-L8-nodes04
> 
> # 7 nodes
> #mpirun -np 13 --hostfile hostfile3.txt ./trace.sh ./feastgpu-ompi 
> master.dat.strongscaling.m6.L8.np013.dat
> mpirun -np 13 --hostfile hostfile3.txt ./feastgpu-ompi 
> master.dat.strongscaling.m6.L8.np013.dat
> #mpiexec -n 13 -f dual.hostfile ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np013.dat
> #srun -n 13 -m arbitrary ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np013.dat
> mkdir arm-strongscaling-series1-L8-nodes07
> mv feastlog.* arm-strongscaling-series1-L8-nodes07
> 
> # 13 nodes
> #mpirun -np 25 --hostfile hostfile3.txt ./trace.sh ./feastgpu-ompi 
> master.dat.strongscaling.m6.L8.np025.dat
> mpirun -np 25 --hostfile hostfile3.txt ./feastgpu-ompi 
> master.dat.strongscaling.m6.L8.np025.dat
> #mpiexec -n 25 -f dual.hostfile ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np025.dat
> #srun -n 25 -m arbitrary ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np025.dat
> mkdir arm-strongscaling-series1-L8-nodes13
> mv feastlog.* arm-strongscaling-series1-L8-nodes13
> 
> # 25 nodes
> #mpirun -np 49 --hostfile hostfile3.txt ./trace.sh ./feastgpu-ompi 
> master.dat.strongscaling.m6.L8.np049.dat
> mpirun -np 49 --hostfile hostfile3.txt ./feastgpu-ompi 
> master.dat.strongscaling.m6.L8.np049.dat
> #mpiexec -n 49 -f dual.hostfile ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np049.dat
> #srun -n 49 -m arbitrary ./feastgpu-mpich2 
> master.dat.strongscaling.m6.L8.np049.dat
> mkdir arm-strongscaling-series1-L8-nodes25
> mv feastlog.* arm-strongscaling-series1-L8-nodes25
> 
> 
> 
> 
> 
> 
> On 07/05/2012 12:10 AM, Hodgess, Erin wrote:
>> Dear MPI people:
>> 
>> Is there a way (a script) available to automatically generate a machinefile, 
>> please?
>> 
>> This would be on Rocks.
>> 
>>  ompi_info -v ompi full --parsable
>> package:Open MPI r...@vi-1.rocksclusters.org Distribution
>> ompi:version:full:1.3.2
>> ompi:version:svn:r21054
>> ompi:version:release_date:Apr 21, 2009
>> orte:version:full:1.3.2
>> orte:version:svn:r21054
>> orte:version:release_date:Apr 21, 2009
>> opal:version:full:1.3.2
>> opal:version:svn:r21054
>> opal:version:release_date:Apr 21, 2009
>> ident:1.3.2
>> 
>> Thanks,
>> Erin
>> 
>> 
>> 
>> Erin M. Hodgess, PhD
>> Associate Professor
>> Department of Computer and Mathematical Sciences
>> University of Houston - Downtown
>> mailto: hodge...@uhd.edu
>> 
>> 
>> 
>> 
>> _______________________________________________
>> users mailing list
>> 
>> us...@open-mpi.org
>> http://www.open-mpi.org/mailman/listinfo.cgi/users
> 
> 
> -- 
> Jun.-Prof. Dr. Dominik Göddeke
> Hardware-orientierte Numerik für große Systeme
> Institut für Angewandte Mathematik (LS III)
> Fakultät für Mathematik, Technische Universität Dortmund
> 
> http://www.mathematik.tu-dortmund.de/~goeddeke
> 
> Tel. +49-(0)231-755-7218  Fax +49-(0)231-755-5933
> 
> 
> 
> 
> 
> _______________________________________________
> users mailing list
> us...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/users


Reply via email to