Hi,

some weeks ago I reported a problem with my matrix multiplication
program in a heterogeneous environment (little endian and big endian
machines). The problem occurs in openmpi-1.6.x, openmpi-1.7, and
openmpi-1.9. Now I implemented a small program which only scatters
the columns of an integer matrix so that it is easier to see what
goes wrong. I configured for a heterogeneous environment. Adding
"-hetero-nodes" and/or "-hetero-apps" on the command line doesn't
change much as you can see at the end of this email. Everything
works fine, if I use only little endian or only big endian machines.
Is it possible to fix the problem or do you know in which file(s)
I would have to look to find the problem or do you know debug
switches which would provide more information to solve the problem?
I used the following command to configure the package on my "Solaris
10 Sparc" system (the commands for my other systems are similar).
Next time I will also add "-without-sctp" to get rid of the failures
on my Linux machines (Open SuSE 12.1).

../openmpi-1.9a1r27668/configure --prefix=/usr/local/openmpi-1.9_64_cc \
  --libdir=/usr/local/openmpi-1.9_64_cc/lib64 \
  --with-jdk-bindir=/usr/local/jdk1.7.0_07/bin/sparcv9 \
  --with-jdk-headers=/usr/local/jdk1.7.0_07/include \
  JAVA_HOME=/usr/local/jdk1.7.0_07 \
  LDFLAGS="-m64" \
  CC="cc" CXX="CC" FC="f95" \
  CFLAGS="-m64" CXXFLAGS="-m64 -library=stlport4" FCFLAGS="-m64" \
  CPP="cpp" CXXCPP="cpp" \
  CPPFLAGS="" CXXCPPFLAGS="" \
  C_INCL_PATH="" C_INCLUDE_PATH="" CPLUS_INCLUDE_PATH="" \
  OBJC_INCLUDE_PATH="" OPENMPI_HOME="" \
  --enable-cxx-exceptions \
  --enable-mpi-java \
  --enable-heterogeneous \
  --enable-opal-multi-threads \
  --enable-mpi-thread-multiple \
  --with-threads=posix \
  --with-hwloc=internal \
  --without-verbs \
  --without-udapl \
  --with-wrapper-cflags=-m64 \
  --enable-debug \
  |& tee log.configure.$SYSTEM_ENV.$MACHINE_ENV.64_cc



tyr small_prog 501 ompi_info | grep -e Ident -e Hetero -e "Built on"
            Ident string: 1.9a1r27668
                Built on: Wed Dec 12 09:00:13 CET 2012
   Heterogeneous support: yes
tyr small_prog 502 


tyr small_prog 488 mpiexec -np 6 -host sunpc0,rs0 column_int

matrix:

0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  


Column of process 1:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 2:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 3:
0x56780000  0x12340000  0x5678ffff  0x1234ce71  

Column of process 4:
0x56780000  0x12340000  0x5678ffff  0x1234ce71  

Column of process 0:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 5:
0x56780000  0x12340000  0x5678ffff  0x1234ce71  
tyr small_prog 489 




tyr small_prog 489 mpiexec -np 6 -host rs0,sunpc0 column_int

matrix:

0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  


Column of process 1:

Column of process 2:
0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 3:
0xffdf1234  0xffff5678  0x401234  0x5678  

Column of process 4:
0xffdf1234  0xffff5678  0x401234  0x5678  

Column of process 0:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 5:
0xffdf1234  0xffff5678  0x401234  0x5678  
tyr small_prog 490 




tyr small_prog 491 mpiexec -np 6 -mca btl ^sctp -host rs0,linpc0 column_int

matrix:

0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  


Column of process 1:

Column of process 2:
0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 3:
0x1234  0x5678  0xf71c1234  0x5678  

Column of process 4:
0x1234  0x5678  0xc6011234  0x5678  

Column of process 0:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 5:
0x1234  0x5678  0x426f1234  0x5678  
tyr small_prog 492 




tyr small_prog 492 mpiexec -np 6 -mca btl ^sctp -host linpc0,rs0 column_int

matrix:

0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  


Column of process 2:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 1:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 3:
0x56780000  0x12340000  0x5678ffff  0x1234ce51  

Column of process 4:
0x56780000  0x12340000  0x5678ffff  0x1234ce51  

Column of process 0:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 5:
0x56780000  0x12340000  0x5678ffff  0x1234ce51  
tyr small_prog 493 



tyr small_prog 498 mpiexec -np 6 -mca btl ^sctp -hetero-nodes \
  -host linpc0,rs0 column_int

matrix:

0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  


Column of process 1:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 2:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 3:
0x56780000  0x12340000  0x5678ffff  0x1234ce31  

Column of process 4:
0x56780000  0x12340000  0x5678ffff  0x1234ce31  

Column of process 0:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 5:
0x56780000  0x12340000  0x5678ffff  0x1234ce31  
tyr small_prog 499 



tyr small_prog 499 mpiexec -np 6 -mca btl ^sctp -hetero-nodes \
  -hetero-apps -host linpc0,rs0 column_int

matrix:

0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  


Column of process 1:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 2:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 3:
0x56780000  0x12340000  0x5678ffff  0x1234ce11  

Column of process 4:
0x56780000  0x12340000  0x5678ffff  0x1234ce11  

Column of process 0:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 5:
0x56780000  0x12340000  0x5678ffff  0x1234ce11  
tyr small_prog 500 



tyr small_prog 500 mpiexec -np 6 -mca btl ^sctp -hetero-apps \
  -host linpc0,rs0 column_int

matrix:

0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  
0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  0x12345678  


Column of process 2:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 1:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 3:
0x56780000  0x12340000  0x5678ffff  0x1234ce31  

Column of process 4:
0x56780000  0x12340000  0x5678ffff  0x1234ce31  

Column of process 0:
0x12345678  0x12345678  0x12345678  0x12345678  

Column of process 5:
0x56780000  0x12340000  0x5678ffff  0x1234ce31  
tyr small_prog 501 


Thank you very much for any help in advance.


Kind regards

Siegmar
/* Small program  that creates and prints column vectors of a matrix.
 *
 * An MPI data type is defined by its size, its contents, and its
 * extent. When multiple elements of the same size are used in a
 * contiguous manner (e.g. in a "scatter" operation or an operation
 * with "count" greater than one) the extent is used to compute where
 * the next element will start. The extent for a derived data type is
 * as big as the size of the derived data type so that the first
 * elements of the second structure will start after the last element
 * of the first structure, i.e., you have to "resize" the new data
 * type if you want to send it multiple times (count > 1) or to
 * scatter/gather it to many processes. Restrict the extent of the
 * derived data type for a strided vector in such a way that it looks
 * like just one element if it is used with "count > 1" or in a
 * scatter/gather operation.
 *
 *
 * Compiling:
 *   Store executable(s) into local directory.
 *     mpicc -o <program name> <source code file name>
 *
 *   Store executable(s) into predefined directories.
 *     make
 *
 *   Make program(s) automatically on all specified hosts. You must
 *   edit the file "make_compile" and specify your host names before
 *   you execute it.
 *     make_compile
 *
 * Running:
 *   LAM-MPI:
 *     mpiexec -boot -np <number of processes> <program name>
 *     or
 *     mpiexec -boot \
 *	 -host <hostname> -np <number of processes> <program name> : \
 *	 -host <hostname> -np <number of processes> <program name>
 *     or
 *     mpiexec -boot [-v] -configfile <application file>
 *     or
 *     lamboot [-v] [<host file>]
 *       mpiexec -np <number of processes> <program name>
 *	 or
 *	 mpiexec [-v] -configfile <application file>
 *     lamhalt
 *
 *   OpenMPI:
 *     "host1", "host2", and so on can all have the same name,
 *     if you want to start a virtual computer with some virtual
 *     cpu's on the local host. The name "localhost" is allowed
 *     as well.
 *
 *     mpiexec -np <number of processes> <program name>
 *     or
 *     mpiexec --host <host1,host2,...> \
 *	 -np <number of processes> <program name>
 *     or
 *     mpiexec -hostfile <hostfile name> \
 *	 -np <number of processes> <program name>
 *     or
 *     mpiexec -app <application file>
 *
 * Cleaning:
 *   local computer:
 *     rm <program name>
 *     or
 *     make clean_all
 *   on all specified computers (you must edit the file "make_clean_all"
 *   and specify your host names before you execute it.
 *     make_clean_all
 *
 *
 * File: column_int.c			Author: S. Gross
 * Date: 14.12.2012
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"

#define	P		  4		/* # of rows			*/
#define Q		  6		/* # of columns			*/
#define NUM_ELEM_PER_LINE 6		/* to print a vector		*/


int main (int argc, char *argv[])
{
  int ntasks,				/* number of parallel tasks	*/
      mytid,				/* my task id			*/
      i, j,				/* loop variables		*/
      matrix[P][Q],
      column[P],
      tmp;				/* temporary value		*/
  MPI_Datatype	column_t,		/* column type (strided vector)	*/
		tmp_column_t;		/* needed to resize the extent	*/

  MPI_Init (&argc, &argv);
  MPI_Comm_rank (MPI_COMM_WORLD, &mytid);
  MPI_Comm_size (MPI_COMM_WORLD, &ntasks);
  /* check that we have the correct number of processes in our universe	*/
  if (mytid == 0)
  {
    if (ntasks != Q)
    {
      fprintf (stderr, "\n\nI need exactly %d processes.\n\n"
	       "Usage:\n"
	       "  mpiexec -np %d %s\n\n", Q, Q, argv[0]);
    }
  }
  if (ntasks != Q)
  {
    MPI_Finalize ();
    exit (EXIT_SUCCESS);
  }
  /* Build the new type for a strided vector and resize the extent
   * of the new datatype in such a way that the extent of the whole
   * column looks like just one element.
   */
  MPI_Type_vector (P, 1, Q, MPI_INT, &tmp_column_t);
  MPI_Type_create_resized (tmp_column_t, 0, sizeof (int), &column_t);
  MPI_Type_commit (&column_t);
  MPI_Type_free (&tmp_column_t);
  if (mytid == 0)
  {
    tmp = 1;
    for (i = 0; i < P; ++i)		/* initialize matrix		*/
    {
      for (j = 0; j < Q; ++j)
      {
	/*	matrix[i][j] = tmp++; */
	matrix[i][j] = 0x12345678;
      }
    }
    printf ("\nmatrix:\n\n");		/* print matrix			*/
    for (i = 0; i < P; ++i)
    {
      for (j = 0; j < Q; ++j)
      {
	printf ("%#x  ", matrix[i][j]);
      }
      printf ("\n");
    }
    printf ("\n");
  }
  MPI_Scatter (matrix, 1, column_t, column, P, MPI_INT, 0,
	       MPI_COMM_WORLD);
  /* Each process prints its column. The output will intermingle on
   * the screen so that you must use "-output-filename" in Open MPI.
   */
  printf ("\nColumn of process %d:\n", mytid);
  for (i = 0; i < P; ++i)
  {
    if (((i + 1) % NUM_ELEM_PER_LINE) == 0)
    {
      printf ("%#x\n", column[i]);
    }
    else
    {
      printf ("%#x  ", column[i]);
    }
  }
  printf ("\n");
  MPI_Type_free (&column_t);
  MPI_Finalize ();
  return EXIT_SUCCESS;
}

Reply via email to