Hi,

I have built openmpi-v1.10.3rc4 on my machines (Solaris 10 Sparc,
Solaris 10 x86_64, and openSUSE Linux 12.1 x86_64) with gcc-5.1.0
and Sun C 5.13. Unfortunately I have once more a problem with
"--slot-list". This time a small program breaks on my Sparc machine
while it works as expected on Linux and SunOS x86_64.


tyr hello_1 104 ompi_info | grep -e "OPAL repo revision:" -e "C compiler 
absolute:"
      OPAL repo revision: v1.10.2-244-g34e1f22
     C compiler absolute: /usr/local/gcc-5.1.0/bin/gcc

tyr hello_1 105 mpiexec -np 3 --host tyr --slot-list 0:0-1,1:0-1 hello_1_mpi
[tyr.informatik.hs-fulda.de:23717] [[64131,0],0] ORTE_ERROR_LOG: Not found in file ../../../../../openmpi-1.10.3rc4/orte/mca/rmaps/rank_file/rmaps_rank_file.c at line 326 [tyr.informatik.hs-fulda.de:23717] [[64131,0],0] ORTE_ERROR_LOG: Not found in file ../../../../openmpi-1.10.3rc4/orte/mca/rmaps/base/rmaps_base_map_job.c at line 377

tyr hello_1 106 /usr/local/gdb-7.6.1_64_gcc/bin/gdb mpiexec
GNU gdb (GDB) 7.6.1
...
Reading symbols from /export2/prog/SunOS_sparc/openmpi-1.10.3_64_gcc/bin/orterun...done.
(gdb) set args -np 3 --host tyr --slot-list 0:0-1,1:0-1 hello_1_mpi
(gdb) r
Starting program: /usr/local/openmpi-1.10.3_64_gcc/bin/mpiexec -np 3 --host tyr --slot-list 0:0-1,1:0-1 hello_1_mpi
[Thread debugging using libthread_db enabled]
[New Thread 1 (LWP 1)]
[New LWP    2        ]
[tyr.informatik.hs-fulda.de:23719] [[64129,0],0] ORTE_ERROR_LOG: Not found in file ../../../../../openmpi-1.10.3rc4/orte/mca/rmaps/rank_file/rmaps_rank_file.c at line 326 [tyr.informatik.hs-fulda.de:23719] [[64129,0],0] ORTE_ERROR_LOG: Not found in file ../../../../openmpi-1.10.3rc4/orte/mca/rmaps/base/rmaps_base_map_job.c at line 377
[LWP    2         exited]
[New Thread 2        ]
[Switching to Thread 1 (LWP 1)]
sol_thread_fetch_registers: td_ta_map_id2thr: no thread can be found to satisfy query
(gdb) bt
#0  0xffffffff7f6173d0 in rtld_db_dlactivity () from /usr/lib/sparcv9/ld.so.1
#1  0xffffffff7f6175a8 in rd_event () from /usr/lib/sparcv9/ld.so.1
#2  0xffffffff7f618950 in lm_delete () from /usr/lib/sparcv9/ld.so.1
#3  0xffffffff7f6226bc in remove_so () from /usr/lib/sparcv9/ld.so.1
#4  0xffffffff7f624574 in remove_hdl () from /usr/lib/sparcv9/ld.so.1
#5  0xffffffff7f61d97c in dlclose_core () from /usr/lib/sparcv9/ld.so.1
#6  0xffffffff7f61d9d4 in dlclose_intn () from /usr/lib/sparcv9/ld.so.1
#7  0xffffffff7f61db0c in dlclose () from /usr/lib/sparcv9/ld.so.1
#8  0xffffffff7eccbd4c in dlopen_close (handle=0x100181820)
    at 
../../../../../openmpi-1.10.3rc4/opal/mca/dl/dlopen/dl_dlopen_module.c:148
#9  0xffffffff7eccb480 in opal_dl_close (handle=0x100181820)
    at ../../../../openmpi-1.10.3rc4/opal/mca/dl/base/dl_base_fns.c:53
#10 0xffffffff7ec9094c in ri_destructor (obj=0x100183c40)
at ../../../../openmpi-1.10.3rc4/opal/mca/base/mca_base_component_repository.c:357
#11 0xffffffff7ec8f448 in opal_obj_run_destructors (object=0x100183c40)
    at ../../../../openmpi-1.10.3rc4/opal/class/opal_object.h:451
#12 0xffffffff7ec9023c in mca_base_component_repository_release (
    component=0xffffffff7b824938 <mca_oob_tcp_component>)
at ../../../../openmpi-1.10.3rc4/opal/mca/base/mca_base_component_repository.c:223
#13 0xffffffff7ec923dc in mca_base_component_unload (
    component=0xffffffff7b824938 <mca_oob_tcp_component>, output_id=-1)
    at 
../../../../openmpi-1.10.3rc4/opal/mca/base/mca_base_components_close.c:47
#14 0xffffffff7ec92474 in mca_base_component_close (
    component=0xffffffff7b824938 <mca_oob_tcp_component>, output_id=-1)
    at 
../../../../openmpi-1.10.3rc4/opal/mca/base/mca_base_components_close.c:60
#15 0xffffffff7ec92544 in mca_base_components_close (output_id=-1,
    components=0xffffffff7f12c6e0 <orte_oob_base_framework+80>, skip=0x0)
    at 
../../../../openmpi-1.10.3rc4/opal/mca/base/mca_base_components_close.c:86
#16 0xffffffff7ec924b4 in mca_base_framework_components_close (
    framework=0xffffffff7f12c690 <orte_oob_base_framework>, skip=0x0)
    at 
../../../../openmpi-1.10.3rc4/opal/mca/base/mca_base_components_close.c:66
#17 0xffffffff7efadd34 in orte_oob_base_close ()
    at ../../../../openmpi-1.10.3rc4/orte/mca/oob/base/oob_base_frame.c:94
#18 0xffffffff7eca91e8 in mca_base_framework_close (
    framework=0xffffffff7f12c690 <orte_oob_base_framework>)
    at ../../../../openmpi-1.10.3rc4/opal/mca/base/mca_base_framework.c:198
#19 0xffffffff7c807c68 in rte_finalize ()
    at ../../../../../openmpi-1.10.3rc4/orte/mca/ess/hnp/ess_hnp_module.c:882
#20 0xffffffff7ef31180 in orte_finalize ()
    at ../../openmpi-1.10.3rc4/orte/runtime/orte_finalize.c:65
#21 0x00000001000074d4 in orterun (argc=8, argv=0xffffffff7fffdf78)
    at ../../../../openmpi-1.10.3rc4/orte/tools/orterun/orterun.c:1151
#22 0x0000000100003e78 in main (argc=8, argv=0xffffffff7fffdf78)
    at ../../../../openmpi-1.10.3rc4/orte/tools/orterun/main.c:13
(gdb) q
A debugging session is active.

        Inferior 1 [process 23719    ] will be killed.

Quit anyway? (y or n) y
Quitting: sol_thread_fetch_registers: td_ta_map_id2thr: no thread can be found to satisfy query
tyr hello_1 107



I would be grateful if somebody can fix the problem. Please let me
know, if you need more information. Thank you very much for any help
in advance.


Kind regards

Siegmar
/* An MPI-version of the "hello world" program, which delivers some
 * information about its machine and operating system.
 *
 *
 * Compiling:
 *   Store executable(s) into local directory.
 *     mpicc -o <program name> <source code file name>
 *
 *   Store executable(s) into predefined directories.
 *     make
 *
 *   Make program(s) automatically on all specified hosts. You must
 *   edit the file "make_compile" and specify your host names before
 *   you execute it.
 *     make_compile
 *
 * Running:
 *   LAM-MPI:
 *     mpiexec -boot -np <number of processes> <program name>
 *     or
 *     mpiexec -boot \
 *	 -host <hostname> -np <number of processes> <program name> : \
 *	 -host <hostname> -np <number of processes> <program name>
 *     or
 *     mpiexec -boot [-v] -configfile <application file>
 *     or
 *     lamboot [-v] [<host file>]
 *       mpiexec -np <number of processes> <program name>
 *	 or
 *	 mpiexec [-v] -configfile <application file>
 *     lamhalt
 *
 *   OpenMPI:
 *     "host1", "host2", and so on can all have the same name,
 *     if you want to start a virtual computer with some virtual
 *     cpu's on the local host. The name "localhost" is allowed
 *     as well.
 *
 *     mpiexec -np <number of processes> <program name>
 *     or
 *     mpiexec --host <host1,host2,...> \
 *	 -np <number of processes> <program name>
 *     or
 *     mpiexec -hostfile <hostfile name> \
 *	 -np <number of processes> <program name>
 *     or
 *     mpiexec -app <application file>
 *
 * Cleaning:
 *   local computer:
 *     rm <program name>
 *     or
 *     make clean_all
 *   on all specified computers (you must edit the file "make_clean_all"
 *   and specify your host names before you execute it.
 *     make_clean_all
 *
 *
 * File: hello_1_mpi.c		       	Author: S. Gross
 * Date: 01.10.2012
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/utsname.h>
#include "mpi.h"

#define	BUF_SIZE	255		/* message buffer size		*/
#define	MAX_TASKS	12		/* max. number of tasks		*/
#define	SENDTAG		1		/* send message command		*/
#define	EXITTAG		2		/* termination command		*/
#define	MSGTAG		3		/* normal message token		*/

#define ENTASKS		-1		/* error: too many tasks	*/

static void master (void);
static void slave (void);

int main (int argc, char *argv[])
{
  int  mytid,				/* my task id			*/
       ntasks,				/* number of parallel tasks	*/
       namelen;				/* length of processor name	*/
  char processor_name[MPI_MAX_PROCESSOR_NAME];

  MPI_Init (&argc, &argv);
  MPI_Comm_rank (MPI_COMM_WORLD, &mytid);
  MPI_Comm_size (MPI_COMM_WORLD, &ntasks);
  MPI_Get_processor_name (processor_name, &namelen);
  /* With the next statement every process executing this code will
   * print one line on the display. It may happen that the lines will
   * get mixed up because the display is a critical section. In general
   * only one process (mostly the process with rank 0) will print on
   * the display and all other processes will send their messages to
   * this process. Nevertheless for debugging purposes (or to
   * demonstrate that it is possible) it may be useful if every
   * process prints itself.
   */
  fprintf (stdout, "Process %d of %d running on %s\n",
	   mytid, ntasks, processor_name);
  fflush (stdout);
  MPI_Barrier (MPI_COMM_WORLD);		/* wait for all other processes	*/

  if (mytid == 0)
  {
    master ();
  }
  else
  {
    slave ();
  }
  MPI_Finalize ();
  return EXIT_SUCCESS;
}


/* Function for the "master task". The master sends a request to all
 * slaves asking for a message. After receiving and printing the
 * messages he sends all slaves a termination command.
 *
 * input parameters:	not necessary
 * output parameters:	not available
 * return value:	nothing
 * side effects:	no side effects
 *
 */
void master (void)
{
  int		ntasks,			/* number of parallel tasks	*/
		mytid,			/* my task id			*/
		num,			/* number of entries		*/
		i;			/* loop variable		*/
  char		buf[BUF_SIZE + 1];    	/* message buffer (+1 for '\0')	*/
  MPI_Status	stat;			/* message details		*/

  MPI_Comm_rank (MPI_COMM_WORLD, &mytid);
  MPI_Comm_size (MPI_COMM_WORLD, &ntasks);
  if (ntasks > MAX_TASKS)
  {
    fprintf (stderr, "Error: Too many tasks. Try again with at most "
	     "%d tasks.\n", MAX_TASKS);
    /* terminate all slave tasks					*/
    for (i = 1; i < ntasks; ++i)
    {
      MPI_Send ((char *) NULL, 0, MPI_CHAR, i, EXITTAG, MPI_COMM_WORLD);
    }
    MPI_Finalize ();
    exit (ENTASKS);
  }
  printf ("\n\nNow %d slave tasks are sending greetings.\n\n",
	  ntasks - 1);
  /* request messages from slave tasks					*/
  for (i = 1; i < ntasks; ++i)
  {
    MPI_Send ((char *) NULL, 0, MPI_CHAR, i, SENDTAG, MPI_COMM_WORLD);
  }
  /* wait for messages and print greetings     				*/
  for (i = 1; i < ntasks; ++i)
  {
    MPI_Recv (buf, BUF_SIZE, MPI_CHAR, MPI_ANY_SOURCE,
	      MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
    MPI_Get_count (&stat, MPI_CHAR, &num);
    buf[num] = '\0';			/* add missing end-of-string	*/
    printf ("Greetings from task %d:\n"
	    "  message type:        %d\n"
	    "  msg length:          %d characters\n"
	    "  message:             %s\n\n",
	    stat.MPI_SOURCE, stat.MPI_TAG, num, buf);
  }
  /* terminate all slave tasks						*/
  for (i = 1; i < ntasks; ++i)
  {
    MPI_Send ((char *) NULL, 0, MPI_CHAR, i, EXITTAG, MPI_COMM_WORLD);
  }
}


/* Function for "slave tasks". The slave task sends its hostname,
 * operating system name and release, and processor architecture
 * as a message to the master.
 *
 * input parameters:	not necessary
 * output parameters:	not available
 * return value:	nothing
 * side effects:	no side effects
 *
 */
void slave (void)
{
  struct utsname sys_info;		/* system information		*/
  int		 mytid,		       	/* my task id			*/
		 more_to_do;
  char		 buf[BUF_SIZE];       	/* message buffer      		*/
  MPI_Status	 stat;			/* message details		*/

  MPI_Comm_rank (MPI_COMM_WORLD, &mytid);
  more_to_do = 1;
  while (more_to_do == 1)
  {
    /* wait for a message from the master task				*/
    MPI_Recv (buf, BUF_SIZE, MPI_CHAR, 0, MPI_ANY_TAG,
	      MPI_COMM_WORLD, &stat);
    if (stat.MPI_TAG != EXITTAG)
    {
      uname (&sys_info);
      strcpy (buf, "\n    hostname:          ");
      strncpy (buf + strlen (buf), sys_info.nodename,
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), "\n    operating system:  ",
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), sys_info.sysname,
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), "\n    release:           ",
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), sys_info.release,
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), "\n    processor:         ",
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), sys_info.machine,
	       BUF_SIZE - strlen (buf));
      MPI_Send (buf, strlen (buf), MPI_CHAR, stat.MPI_SOURCE,
		MSGTAG, MPI_COMM_WORLD);
    }
    else
    {
      more_to_do = 0;			/* terminate			*/
    }
  }
}

Reply via email to