Hi,

I've installed (tried to install) openmpi-master-201901030305-ee26ed9 on
my "SUSE Linux Enterprise Server 12.3 (x86_64)" with gcc-7.3.0, icc-19.0.1.144
pgcc-18.4-0, and Sun C 5.15 (Oracle Developer Studio 12.6). Unfortunately, I
still cannot build it with Sun C and I get a segmentation fault for one of
my small programs for the other compilers.

I get the following error for Sun C that I reported some time ago.
https://www.mail-archive.com/users@lists.open-mpi.org/msg32816.html


The program runs as expected if I only use my local machine "loki" and it
breaks if I add a remote machine (even if I only use the remote machine
without "loki").

loki hello_1 114 ompi_info | grep -e "Open MPI repo revision" -e"Configure command line"
  Open MPI repo revision: v2.x-dev-6601-gee26ed9
Configure command line: '--prefix=/usr/local/openmpi-master_64_gcc' '--libdir=/usr/local/openmpi-master_64_gcc/lib64' '--with-jdk-bindir=/usr/local/jdk-11/bin' '--with-jdk-headers=/usr/local/jdk-11/include' 'JAVA_HOME=/usr/local/jdk-11' 'LDFLAGS=-m64 -L/usr/local/cuda/lib64' 'CC=gcc' 'CXX=g++' 'FC=gfortran' 'CFLAGS=-m64 -I/usr/local/cuda/include' 'CXXFLAGS=-m64 -I/usr/local/cuda/include' 'FCFLAGS=-m64' 'CPP=cpp -I/usr/local/cuda/include' 'CXXCPP=cpp -I/usr/local/cuda/include' '--enable-mpi-cxx' '--enable-cxx-exceptions' '--enable-mpi-java' '--with-cuda=/usr/local/cuda' '--with-valgrind=/usr/local/valgrind' '--with-hwloc=internal' '--without-verbs' '--with-wrapper-cflags=-std=c11 -m64' '--with-wrapper-cxxflags=-m64' '--with-wrapper-fcflags=-m64' '--enable-debug'


loki hello_1 115 mpiexec -np 4 --host loki:2,nfs2:2 hello_1_mpi
Process 0 of 4 running on loki
Process 1 of 4 running on loki
Process 2 of 4 running on nfs2
Process 3 of 4 running on nfs2

Now 3 slave tasks are sending greetings.

Greetings from task 1:
  message type:        3
  msg length:          132 characters
... (complete output of my program)

[nfs2:01336] *** Process received signal ***
[nfs2:01336] Signal: Segmentation fault (11)
[nfs2:01336] Signal code: Address not mapped (1)
[nfs2:01336] Failing at address: 0x7feea4849268
[nfs2:01336] [ 0] /lib64/libpthread.so.0(+0x10c10)[0x7feeacbbec10]
[nfs2:01336] [ 1] /usr/local/openmpi-master_64_gcc/lib64/libopen-pal.so.0(+0x7cd34)[0x7feeadd94d34] [nfs2:01336] [ 2] /usr/local/openmpi-master_64_gcc/lib64/libopen-pal.so.0(+0x78673)[0x7feeadd90673] [nfs2:01336] [ 3] /usr/local/openmpi-master_64_gcc/lib64/libopen-pal.so.0(+0x7ac2c)[0x7feeadd92c2c] [nfs2:01336] [ 4] /usr/local/openmpi-master_64_gcc/lib64/libopen-pal.so.0(opal_finalize_cleanup_domain+0x3e)[0x7feeadd56507] [nfs2:01336] [ 5] /usr/local/openmpi-master_64_gcc/lib64/libopen-pal.so.0(opal_finalize_util+0x56)[0x7feeadd56667] [nfs2:01336] [ 6] /usr/local/openmpi-master_64_gcc/lib64/libopen-pal.so.0(opal_finalize+0xd3)[0x7feeadd567de] [nfs2:01336] [ 7] /usr/local/openmpi-master_64_gcc/lib64/libopen-rte.so.0(orte_finalize+0x1ba)[0x7feeae09d7ea] [nfs2:01336] [ 8] /usr/local/openmpi-master_64_gcc/lib64/libopen-rte.so.0(orte_daemon+0x3ddd)[0x7feeae0cf55d]
[nfs2:01336] [ 9] orted[0x40086d]
[nfs2:01336] [10] /lib64/libc.so.6(__libc_start_main+0xf5)[0x7feeac829725]
[nfs2:01336] [11] orted[0x400739]
[nfs2:01336] *** End of error message ***
Segmentation fault (core dumped)
loki hello_1 116


I would be grateful, if somebody can fix the problem. Do you need anything
else? Thank you very much for any help in advance.


Kind regards

Siegmar
/* An MPI-version of the "hello world" program, which delivers some
 * information about its machine and operating system.
 *
 *
 * Compiling:
 *   mpicc -o <program name> <source code file name> -lm
 *
 * Running:
 *   mpiexec -np <number of processes> <program name>
 *
 *
 * File: hello_1_mpi.c		       	Author: S. Gross
 * Date: 04.08.2017
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/utsname.h>
#include "mpi.h"

#define	BUF_SIZE	255		/* message buffer size		*/
#define	MAX_TASKS	12		/* max. number of tasks		*/
#define	SENDTAG		1		/* send message command		*/
#define	EXITTAG		2		/* termination command		*/
#define	MSGTAG		3		/* normal message token		*/

#define ENTASKS		-1		/* error: too many tasks	*/

static void master (void);
static void slave (void);

int main (int argc, char *argv[])
{
  int  mytid,				/* my task id			*/
       ntasks,				/* number of parallel tasks	*/
       namelen;				/* length of processor name	*/
  char processor_name[MPI_MAX_PROCESSOR_NAME];

  MPI_Init (&argc, &argv);
  MPI_Comm_rank (MPI_COMM_WORLD, &mytid);
  MPI_Comm_size (MPI_COMM_WORLD, &ntasks);
  MPI_Get_processor_name (processor_name, &namelen);
  /* With the next statement every process executing this code will
   * print one line on the display. It may happen that the lines will
   * get mixed up because the display is a critical section. In general
   * only one process (mostly the process with rank 0) will print on
   * the display and all other processes will send their messages to
   * this process. Nevertheless for debugging purposes (or to
   * demonstrate that it is possible) it may be useful if every
   * process prints itself.
   */
  fprintf (stdout, "Process %d of %d running on %s\n",
	   mytid, ntasks, processor_name);
  fflush (stdout);
  MPI_Barrier (MPI_COMM_WORLD);		/* wait for all other processes	*/

  if (mytid == 0)
  {
    master ();
  }
  else
  {
    slave ();
  }
  MPI_Finalize ();
  return EXIT_SUCCESS;
}


/* Function for the "master task". The master sends a request to all
 * slaves asking for a message. After receiving and printing the
 * messages he sends all slaves a termination command.
 *
 * input parameters:	not necessary
 * output parameters:	not available
 * return value:	nothing
 * side effects:	no side effects
 *
 */
void master (void)
{
  int		ntasks,			/* number of parallel tasks	*/
		mytid,			/* my task id			*/
		num,			/* number of entries		*/
		i;			/* loop variable		*/
  char		buf[BUF_SIZE + 1];    	/* message buffer (+1 for '\0')	*/
  MPI_Status	stat;			/* message details		*/

  MPI_Comm_rank (MPI_COMM_WORLD, &mytid);
  MPI_Comm_size (MPI_COMM_WORLD, &ntasks);
  if (ntasks > MAX_TASKS)
  {
    fprintf (stderr, "Error: Too many tasks. Try again with at most "
	     "%d tasks.\n", MAX_TASKS);
    /* terminate all slave tasks					*/
    for (i = 1; i < ntasks; ++i)
    {
      MPI_Send ((char *) NULL, 0, MPI_CHAR, i, EXITTAG, MPI_COMM_WORLD);
    }
    MPI_Finalize ();
    exit (ENTASKS);
  }
  printf ("\n\nNow %d slave tasks are sending greetings.\n\n",
	  ntasks - 1);
  /* request messages from slave tasks					*/
  for (i = 1; i < ntasks; ++i)
  {
    MPI_Send ((char *) NULL, 0, MPI_CHAR, i, SENDTAG, MPI_COMM_WORLD);
  }
  /* wait for messages and print greetings     				*/
  for (i = 1; i < ntasks; ++i)
  {
    MPI_Recv (buf, BUF_SIZE, MPI_CHAR, MPI_ANY_SOURCE,
	      MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
    MPI_Get_count (&stat, MPI_CHAR, &num);
    buf[num] = '\0';			/* add missing end-of-string	*/
    printf ("Greetings from task %d:\n"
	    "  message type:        %d\n"
	    "  msg length:          %d characters\n"
	    "  message:             %s\n\n",
	    stat.MPI_SOURCE, stat.MPI_TAG, num, buf);
  }
  /* terminate all slave tasks						*/
  for (i = 1; i < ntasks; ++i)
  {
    MPI_Send ((char *) NULL, 0, MPI_CHAR, i, EXITTAG, MPI_COMM_WORLD);
  }
}


/* Function for "slave tasks". The slave task sends its hostname,
 * operating system name and release, and processor architecture
 * as a message to the master.
 *
 * input parameters:	not necessary
 * output parameters:	not available
 * return value:	nothing
 * side effects:	no side effects
 *
 */
void slave (void)
{
  struct utsname sys_info;		/* system information		*/
  int		 mytid,		       	/* my task id			*/
		 more_to_do;
  char		 buf[BUF_SIZE];       	/* message buffer      		*/
  MPI_Status	 stat;			/* message details		*/

  MPI_Comm_rank (MPI_COMM_WORLD, &mytid);
  more_to_do = 1;
  while (more_to_do == 1)
  {
    /* wait for a message from the master task				*/
    MPI_Recv (buf, BUF_SIZE, MPI_CHAR, 0, MPI_ANY_TAG,
	      MPI_COMM_WORLD, &stat);
    if (stat.MPI_TAG != EXITTAG)
    {
      uname (&sys_info);
      strcpy (buf, "\n    hostname:          ");
      strncpy (buf + strlen (buf), sys_info.nodename,
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), "\n    operating system:  ",
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), sys_info.sysname,
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), "\n    release:           ",
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), sys_info.release,
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), "\n    processor:         ",
	       BUF_SIZE - strlen (buf));
      strncpy (buf + strlen (buf), sys_info.machine,
	       BUF_SIZE - strlen (buf));
      MPI_Send (buf, (int) strlen (buf), MPI_CHAR, stat.MPI_SOURCE,
		MSGTAG, MPI_COMM_WORLD);
    }
    else
    {
      more_to_do = 0;			/* terminate			*/
    }
  }
}
_______________________________________________
users mailing list
users@lists.open-mpi.org
https://lists.open-mpi.org/mailman/listinfo/users

Reply via email to