Although I do not think that Valgrind supports mpic++, I have tried to run it. 
This is what I got, 
thanks------------------------------------------------------------==18729== 
Memcheck, a memory error detector==18729== Copyright (C) 2002-2009, and GNU 
GPL'd, by Julian Seward et al.==18729== Using Valgrind-3.5.0 and LibVEX; rerun 
with -h for copyright info==18729== Command: ./nsga2b==18729== Parent PID: 
18726==18729==--18729----18729-- Valgrind options:--18729--    
--tool=memcheck--18729--    --error-limit=no--18729--    
--leak-check=full--18729--    --log-file=nsga2b_valg.log--18729--    
-v--18729-- Contents of /proc/version:--18729--   Linux version 
2.6.18-128.1.1.el5.530g0000 (root@kalamata) (gcc version 4.1.2 20071124 (Red 
Hat 4.1.2-42)) #1 SMP Tue Mar 17 21:49:24 EDT 2009--18729-- Arch and hwcaps: 
AMD64, amd64-sse3-cx16--18729-- Page sizes: currently 4096, max supported 
4096--18729-- Valgrind library directory: /usr/lib64/valgrind--18729-- Reading 
syms from /lustre/nsga2b (0x400000)--18729-- warning: DiCfSI 0x0 .. 0x0 outside 
segment 0x4438f0 .. 0xd81e77--18729-- warning: DiCfSI 0x1 .. 0x3 outside 
segment 0x4438f0 .. 0xd81e77--18729-- warning: DiCfSI 0x4 .. 0x2a outside 
segment 0x4438f0 .. 0xd81e77--18729-- warning: DiCfSI 0x0 .. 0x0 outside 
segment 0x4438f0 .. 0xd81e77--18729-- warning: DiCfSI 0x1 .. 0x3 outside 
segment 0x4438f0 .. 0xd81e77--18729-- warning: DiCfSI 0x4 .. 0x2a outside 
segment 0x4438f0 .. 0xd81e77--18729-- warning: DiCfSI 0x0 .. 0x0 outside 
segment 0x4438f0 .. 0xd81e77--18729-- warning: DiCfSI 0x1 .. 0x3 outside 
segment 0x4438f0 .. 0xd81e77--18729-- warning: DiCfSI 0x4 .. 0xb outside 
segment 0x4438f0 .. 0xd81e77--18729-- warning: DiCfSI 0xc .. 0xaa outside 
segment 0x4438f0 .. 0xd81e77--18729-- Reading syms from 
/usr/lib64/valgrind/memcheck-amd64-linux (0x38000000)--18729--    object 
doesn't have a dynamic symbol table--18729-- Reading syms from /lib64/ld-2.5.so 
(0x3f75c00000)--18729-- Reading suppressions file: 
/usr/lib64/valgrind/default.supp--18729-- REDIR: 0x3f75c145d0 (strlen) 
redirected to 0x3803e767 (vgPlain_amd64_linux_REDIR_FOR_strlen)--18729-- 
Reading syms from /usr/lib64/valgrind/vgpreload_core-amd64-linux.so 
(0x4802000)--18729-- Reading syms from 
/usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so (0x4a03000)==18729== 
WARNING: new redirection conflicts with existing -- ignoring it--18729--     
new: 0x3f75c145d0 (strlen              ) R-> 0x04a06dc0 strlen--18729-- REDIR: 
0x3f75c143f0 (index) redirected to 0x4a06c30 (index)--18729-- REDIR: 
0x3f75c145a0 (strcmp) redirected to 0x4a06e90 (strcmp)--18729-- Reading syms 
from /opt/openmpi-1.3.4-gnu/lib/libmpi_cxx.so.0.0.0 (0x4c0a000)--18729-- 
Reading syms from /opt/openmpi-1.3.4-gnu/lib/libmpi.so.0.0.1 (0x4e2600
--18729-- Reading syms from /opt/openmpi-1.3.4-gnu/lib/libmpi.so.0.0.1 
(0x4e26000)--18729-- Reading syms from 
/opt/openmpi-1.3.4-gnu/lib/libopen-rte.so.0.0.0 (0x5258000)--18729-- Reading 
syms from /opt/openmpi-1.3.4-gnu/lib/libopen-pal.so.0.0.0 (0x54db000)--18729-- 
Reading syms from /usr/lib64/librdmacm.so.1.0.0 (0x3f77000000)--18729--    
object doesn't have a symbol table--18729-- Reading syms from 
/usr/lib64/libibverbs.so.1.0.0 (0x3f76400000)--18729--    object doesn't have a 
symbol table--18729-- Reading syms from /usr/lib64/libdat.so.1.0.2 
(0x5778000)--18729--    object doesn't have a symbol table--18729-- Reading 
syms from /scratch/torque-2.4.2/lib/libtorque.so.2.0.0 (0x5982000)--18729-- 
Reading syms from /lib64/libdl-2.5.so (0x3f76800000)--18729-- Reading syms from 
/lib64/libnsl-2.5.so (0x3f7fe00000)--18729-- Reading syms from 
/lib64/libutil-2.5.so (0x3f84e00000)--18729-- Reading syms from 
/lib64/libm-2.5.so (0x5c97000)--18729-- Reading syms from 
/usr/lib64/libstdc++.so.6.0.8 (0x3f7c800000)--18729--    object doesn't have a 
symbol table--18729-- Reading syms from /lib64/libgcc_s-4.1.2-20080825.so.1 
(0x3f7b800000)--18729--    object doesn't have a symbol table--18729-- Reading 
syms from /lib64/libpthread-2.5.so (0x3f76c00000)--18729-- Reading syms from 
/lib64/libc-2.5.so (0x3f76000000)--18729-- REDIR: 0x3f7607ae00 (memset) 
redirected to 0x4a07030 (memset)--18729-- REDIR: 0x3f7607c240 (memcpy) 
redirected to 0x4a08030 (memcpy)--18729-- REDIR: 0x3f76079f40 (rindex) 
redirected to 0x4a06ae0 (rindex)--18729-- REDIR: 0x3f76079b50 (strlen) 
redirected to 0x4a06d80 (strlen)--18729-- REDIR: 0x3f76074dc0 (malloc) 
redirected to 0x4a05d9a (malloc)--18729-- REDIR: 0x3f76072870 (free) redirected 
to 0x4a059aa (free)--18729-- REDIR: 0x3f76079e90 (strncpy) redirected to 
0x4a081a0 (strncpy)--18729-- REDIR: 0x3f76079dd0 (strncmp) redirected to 
0x4a06de0 (strncmp)--18729-- REDIR: 0x3f760749e0 (calloc) redirected to 
0x4a05092 (calloc)--18729-- REDIR: 0x3f7c8bd1c0 (operator new(unsigned long)) 
redirected to 0x4a065ea (operator new(unsigned long))--18729-- REDIR: 
0x3f7607b930 (mempcpy) redirected to 0x4a07870 (mempcpy)--18729-- REDIR: 
0xffffffffff600400 (???) redirected to 0x3803e75d 
(vgPlain_amd64_linux_REDIR_FOR_vtime)--18729-- REDIR: 0x3f76079610 (strcpy) 
redirected to 0x4a082c0 (strcpy)--18729-- REDIR: 0x3f760795d0 (strcmp) 
redirected to 0x4a06e50 (strcmp)--18729-- REDIR: 0x3f76079c40 (strnlen) 
redirected to 0x4a06d50 (strnlen)--18729-- REDIR: 0x3f7c8bbf50 (operator 
delete(void*)) redirected to 0x4a056bc (operator delete(void*))--18729-- REDIR: 
0x3f7607a640 (memchr) redirected to 0x4a06f10 (memchr)--18729-- REDIR: 
0x3f76079420 (index) redirected to 0x4a06b70 (index)--18729-- REDIR: 
0x3f7607ac60 (memmove) redirected to 0x4a07080 (memmove)--18729-- REDIR: 
0x3f760752e0 (realloc) redirected to 0x4a05e4b (realloc)
--18729-- REDIR: 0x3f76079d30 (strncat) redirected to 0x4a06c60 
(strncat)--18729-- REDIR: 0x3f76079260 (strcat) redirected to 0x4a07590 
(strcat)--18729-- REDIR: 0x3f7607cea0 (rawmemchr) redirected to 0x4a07110 
(rawmemchr)--18729-- REDIR: 0x3f76074ff0 (memalign) redirected to 0x4a04f6c 
(memalign)--18729-- REDIR: 0xffffffffff600000 (???) redirected to 0x3803e753 
(vgPlain_amd64_linux_REDIR_FOR_vgettimeofday)--18729-- REDIR: 0x3f7607cff0 
(strchrnul) redirected to 0x4a070e0 (strchrnul)--18729-- REDIR: 0x3f7607bf40 
(stpcpy) redirected to 0x4a07b10 (stpcpy)--18729-- Reading syms from 
/lib64/libnss_files-2.5.so (0x794b000)--18729-- Reading syms from 
/lib64/libnss_nis-2.5.so (0x7b56000)--18729-- REDIR: 0x3f76075230 
(posix_memalign) redirected to 0x4a05047 (posix_memalign)--18729-- Reading syms 
from /usr/lib64/libdaplcma.so.1.0.2 (0x8762000)--18729--    object doesn't have 
a symbol table==18729== Invalid write of size 8==18729==    at 0x443BEF: 
initPopPara(population*, std::vector<message_para_to_workersT, 
std::allocator<message_para_to_workersT> >&, initParaType&, int, int, 
std::vector<double, std::allocator<double> >&) 
(main-parallel2.cpp:552)==18729==    by 0x44F12E: main 
(main-parallel2.cpp:204)==18729==  Address 0x62c9da0 is 0 bytes after a block 
of size 0 alloc'd==18729==    at 0x4A0666E: operator new(unsigned long) 
(vg_replace_malloc.c:220)==18729==    by 0x4573E4: void 
std::__uninitialized_fill_n_aux<message_para_to_workersT*, unsigned long, 
message_para_to_workersT>(message_para_to_workersT*, unsigned long, 
message_para_to_workersT const&, __false_type) (new_allocator.h:88)==18729==    
by 0x4576CF: void std::__uninitialized_fill_n_a<message_para_to_workersT*, 
unsigned long, message_para_to_workersT, 
message_para_to_workersT>(message_para_to_workersT*, unsigned long, 
message_para_to_workersT const&, std::allocator<message_para_to_workersT>) 
(stl_uninitialized.h:218)==18729==    by 0x44EE2E: main 
(stl_vector.h:218)==18729====18729== Invalid read of size 8==18729==    at 
0x44F13A: main (main-parallel2.cpp:208)==18729==  Address 0x62c9d60 is 0 bytes 
after a block of size 0 alloc'd==18729==    at 0x4A0666E: operator new(unsigned 
long) (vg_replace_malloc.c:220)==18729==    by 0x45733D: void 
std::__uninitialized_fill_n_aux<message_para_to_workersT*, unsigned long, 
message_para_to_workersT>(message_para_to_workersT*, unsigned long, 
message_para_to_workersT const&, __false_type) (new_allocator.h:88)==18729==    
by 0x4576CF: void std::__uninitialized_fill_n_a<message_para_to_workersT*, 
unsigned long, message_para_to_workersT, 
message_para_to_workersT>(message_para_to_workersT*, unsigned long, 
message_para_to_workersT const&, std::allocator<message_para_to_workersT>) 
(stl_uninitialized.h:218)==18729==    by 0x44EE2E: main 
(stl_vector.h:218)==18729==
valgrind: m_mallocfree.c:225 (mk_plain_bszB): Assertion 'bszB != 0' 
failed.valgrind: This is probably caused by your program erroneously writing 
past theend of a heap block and corrupting heap metadata.  If you fix 
anyinvalid writes reported by Memcheck, this assertion failure will
probably go away.  Please try that before reporting this as a bug.
==18729==    at 0x38029D5C: report_and_quit (m_libcassert.c:145)==18729==    by 
0x3802A032: vgPlain_assert_fail (m_libcassert.c:217)==18729==    by 0x38035645: 
vgPlain_arena_malloc (m_mallocfree.c:225)==18729==    by 0x38002BB5: 
vgMemCheck_new_block (mc_malloc_wrappers.c:199)==18729==    by 0x38002F6B: 
vgMemCheck___builtin_new (mc_malloc_wrappers.c:246)==18729==    by 0x3806070C: 
do_client_request (scheduler.c:1362)==18729==    by 0x38061D30: 
vgPlain_scheduler (scheduler.c:1061)==18729==    by 0x38085E6E: 
run_a_thread_NORETURN (syswrap-linux.c:91)
sched status:  running_tid=1
Thread 1: status = VgTs_Runnable==18729==    at 0x4A0666E: operator 
new(unsigned long) (vg_replace_malloc.c:220)==18729==    by 0x464506: 
__gnu_cxx::new_allocator<int>::allocate(unsigned long, void const*) 
(new_allocator.h:88)==18729==    by 0x46452E: std::_Vector_base<int, 
std::allocator<int> >::_M_allocate(unsigned long) (stl_vector.h:127)==18729==   
 by 0x464560: std::_Vector_base<int, std::allocator<int> 
>::_Vector_base(unsigned long, std::allocator<int> const&) 
(stl_vector.h:113)==18729==    by 0x464B6A: std::vector<int, 
std::allocator<int> >::vector(unsigned long, int const&, std::allocator<int> 
const&) (stl_vector.h:216)==18729==    by 0x488F62: Index::Index() 
(index.cpp:20)==18729==    by 0x489147: ReadFile(char const*) 
(index.cpp:86)==18729==    by 0x48941C: ImportIndices() 
(index.cpp:121)==18729==    by 0x445D00: myNeplanTaskScheduler(CNSGA2*, int, 
int, int, population*, char, int, std::vector<message_para_to_workersT, 
std::allocator<message_para_to_workersT> >&, ompi_datatype_t*, int&, int&, 
std::vector<std::vector<double, std::allocator<double> >, 
std::allocator<std::vector<double, std::allocator<double> > > >&, 
std::vector<std::vector<double, std::allocator<double> >, 
std::allocator<std::vector<double, std::allocator<double> > > >&, 
std::vector<double, std::allocator<double> >&, int, 
std::vector<std::vector<double, std::allocator<double> >, 
std::allocator<std::vector<double, std::allocator<double> > > >&, 
ompi_datatype_t*, int, ompi_datatype_t*, int) 
(myNetplanScheduler.cpp:109)==18729==    by 0x44F2DF: main 
(main-parallel2.cpp:216)

Note: see also the FAQ in the source distribution.It contains workarounds to 
several common problems.In particular, if Valgrind aborted or crashed 
afteridentifying problems in your program, there's a good chancethat fixing 
those problems will prevent Valgrind aborting orcrashing, especially if it 
happened in m_mallocfree.c.

------------------------------------------------------------


> Subject: Re: [OMPI users] OMPI seg fault by a class with weird address.
> From: jsquy...@cisco.com
> Date: Wed, 16 Mar 2011 06:43:01 -0400
> To: dtustud...@hotmail.com
> CC: us...@open-mpi.org
> 
> Did you run with a memory checking debugger like Valgrind?
> 
> Sent from my phone. No type good. 
> 
> On Mar 15, 2011, at 8:30 PM, "Jack Bryan" <dtustud...@hotmail.com> wrote:
> 
> > Hi, 
> > 
> > I have installed a new open MPI 1.3.4. 
> > 
> > But I got more weird errors: 
> > 
> > *** glibc detected *** /lustre/nsga2b: malloc(): memory corruption (fast): 
> > 0x000000001cafc450 ***
> > ======= Backtrace: =========
> > /lib64/libc.so.6[0x3c50272aeb]
> > /lib64/libc.so.6(__libc_malloc+0x7a)[0x3c5027402a]
> > /usr/lib64/libstdc++.so.6(_Znwm+0x1d)[0x3c590bd17d]
> > /lustre/jxding/netplan49/nsga2b[0x445bc6]
> > /lustre/jxding/netplan49/nsga2b[0x44f43b]
> > /lib64/libc.so.6(__libc_start_main+0xf4)[0x3c5021d974]
> > /lustre/jxding/netplan49/nsga2b(__gxx_personality_v0+0x499)[0x443909]
> > ======= Memory map: ========
> > 00400000-00f33000 r-xp 00000000 6ac:e3210 685016360                      
> > /lustre/netplan49/nsga2b
> > 01132000-0117e000 rwxp 00b32000 6ac:e3210 685016360                      
> > /lustre/netplan49/nsga2b
> > 0117e000-01188000 rwxp 0117e000 00:00 0
> > 1ca11000-1ca78000 rwxp 1ca11000 00:00 0
> > 1ca78000-1ca79000 rwxp 1ca78000 00:00 0
> > 1ca79000-1ca7a000 rwxp 1ca79000 00:00 0
> > 1ca7a000-1cab8000 rwxp 1ca7a000 00:00 0
> > 1cab8000-1cac7000 rwxp 1cab8000 00:00 0
> > 1cac7000-1cacf000 rwxp 1cac7000 00:00 0
> > 1cacf000-1cad0000 rwxp 1cacf000 00:00 0
> > 1cad0000-1cad1000 rwxp 1cad0000 00:00 0
> > 1cad1000-1cad2000 rwxp 1cad1000 00:00 0
> > 1cad2000-1cada000 rwxp 1cad2000 00:00 0
> > 1cada000-1cadc000 rwxp 1cada000 00:00 0
> > 1cadc000-1cae0000 rwxp 1cadc000 00:00 0
> > 
> > .........................
> > 512600000-3512605000 r-xp 00000000 00:11 12043                          
> > /usr/lib64/librdmacm.so.1
> > 3512605000-3512804000 ---p 00005000 00:11 12043                          
> > /usr/lib64/librdmacm.so.1
> > 3512804000-3512805000 rwxp 00004000 00:11 12043                          
> > /usr/lib64/librdmacm.so.1
> > 3512e00000-3512e0c000 r-xp 00000000 00:11 5545                           
> > /usr/lib64/libibverbs.so.1
> > 3512e0c000-351300b000 ---p 0000c000 00:11 5545                           
> > /usr/lib64/libibverbs.so.1
> > 351300b000-351300c000 rwxp 0000b000 00:11 5545                           
> > /usr/lib64/libibverbs.so.1
> > 3c4f200000-3c4f21c000 r-xp 00000000 00:11 2853                           
> > /lib64/ld-2.5.so
> > 3c4f41b000-3c4f41c000 r-xp 0001b000 00:11 2853                           
> > /lib64/ld-2.5.so
> > 3c4f41c000-3c4f41d000 rwxp 0001c000 00:11 2853                           
> > /lib64/ld-2.5.so
> > 3c50200000-3c5034c000 r-xp 00000000 00:11 897                            
> > /lib64/libc.so.6
> > 3c5034c000-3c5054c000 ---p 0014c000 00:11 897                            
> > /lib64/libc.so.6
> > 3c5054c000-3c50550000 r-xp 0014c000 00:11 897                            
> > /lib64/libc.so.6
> > 3c50550000-3c50551000 rwxp 00150000 00:11 897                            
> > /lib64/libc.so.6
> > 3c50551000-3c50556000 rwxp 3c50551000 00:00 0
> > 3c50600000-3c50682000 r-xp 00000000 00:11 2924                           
> > /lib64/libm.so.6
> > 3c50682000-3c50881000 ---p 00082000 00:11 2924                           
> > /lib64/libm.so.6
> > 3c50881000-3c50882000 r-xp 00081000 00:11 2924                           
> > /lib64/libm.so.6
> > 3c50882000-3c50883000 rwxp 00082000 00:11 2924                           
> > /lib64/libm.so.6
> > 3c50a00000-3c50a02000 r-xp 00000000 00:11 923                            
> > /lib64/libdl.so.2
> > 3c50a02000-3c50c02000 ---p 00002000 00:11 923                            
> > /lib64/libdl.so.2
> > 3c50c02000-3c50c03000 r-xp 00002000 00:11 923                            
> > /lib64/libdl.so.2
> > 3c50c03000-3c50c04000 rwxp 00003000 00:11 923                            
> > /lib64/libdl.so.2
> > 3c50e00000-3c50e16000 r-xp 00000000 00:11 1011                           
> > /lib64/libpthread.so.0
> > 
> > .....................
> > 2ae87b05e000-2ae87b075000 r-xp 00000000 6ac:e3210 686492235              
> > /lustre/mpi_protocol_091117/openmpi134/lib/libmpi_cxx.so.0.0.0
> > 2ae87b075000-2ae87b274000 ---p 00017000 6ac:e3210 686492235              
> > /lustre/mpi_protocol_091117/openmpi134/lib/libmpi_cxx.so.0.0.0
> > 2ae87b274000-2ae87b277000 rwxp 00016000 6ac:e3210 686492235              
> > /lustre/mpi_protocol_091117/openmpi134/lib/libmpi_cxx.so.0.0.0
> >  
> > 
> > 
> > fff2fa38000-7fff2fa4e000 rwxp 7ffffffe9000 00:00 0                      
> > [stack]
> > ffffffffff600000-ffffffffffe00000 ---p 00000000 00:00 0                  
> > [vdso]
> > [n332:82320] *** Process received signal ***
> > [n332:82320] Signal: Aborted (6)
> > [n332:82320] Signal code:  (-6)
> > [n332:82320] [ 0] /lib64/libpthread.so.0 [0x3c50e0e4c0]
> > [n332:82320] [ 1] /lib64/libc.so.6(gsignal+0x35) [0x3c50230215]
> > [n332:82320] [ 2] /lib64/libc.so.6(abort+0x110) [0x3c50231cc0]
> > [n332:82320] [ 3] /lib64/libc.so.6 [0x3c5026a7fb]
> > [n332:82320] [ 4] /lib64/libc.so.6 [0x3c50272aeb]
> > [n332:82320] [ 5] /lib64/libc.so.6(__libc_malloc+0x7a) [0x3c5027402a]
> > [n332:82320] [ 6] /usr/lib64/libstdc++.so.6(_Znwm+0x1d) [0x3c590bd17d]
> > [n332:82320] [ 7] /lustre/jxding/netplan49/nsga2b [0x445bc6]
> > [n332:82320] [ 8] /lustre/jxding/netplan49/nsga2b [0x44f43b]
> > [n332:82320] [ 9] /lib64/libc.so.6(__libc_start_main+0xf4) [0x3c5021d974]
> > [n332:82320] [10] /lustre/nsga2b(__gxx_personality_v0+0x499) [0x443909]
> > [n332:82320] *** End of error message ***
> > =>> PBS: job killed: walltime 117 exceeded limit 90
> > mpirun: killing job...
> > 
> > 
> > 
> > 
> > > Subject: Re: [OMPI users] OMPI seg fault by a class with weird address.
> > > From: jsquy...@cisco.com
> > > Date: Tue, 15 Mar 2011 12:50:41 -0400
> > > CC: us...@open-mpi.org
> > > To: dtustud...@hotmail.com
> > > 
> > > You can:
> > > 
> > > mpirun -np 4 valgrind ./my_application
> > > 
> > > That is, you run 4 copies of valgrind, each with one instance of 
> > > ./my_application. Then you'll get valgrind reports for your applications. 
> > >  You might want to dig into the valgrind command line options to have it 
> > > dump the results to files with unique prefixes (e.g., PID and/or 
> > > hostname) so that you can get a unique report from each process.
> > > 
> > > If you disabled ptmalloc and you're still getting the same error, then it 
> > > sounds like an application error. Check out and see what valgrind tells 
> > > you.
> > > 
> > > 
> > > 
> > > On Mar 15, 2011, at 11:25 AM, Jack Bryan wrote:
> > > 
> > > > Thanks,
> > > > 
> > > > From http://valgrind.org/docs/manual/mc-manual.html#mc-manual.mpiwrap
> > > > 
> > > > I find that 
> > > > 
> > > > "Currently the wrappers are only buildable with mpiccs which are based 
> > > > on GNU GCC or Intel's C++ Compiler."
> > > > 
> > > > The cluster which I am working on is using GNU Open MPI mpic++. i am 
> > > > afraid that the Valgrind wrapper can work here. 
> > > > 
> > > > I do not have system administrator authorization. 
> > > > 
> > > > Are there other mem-checkers (open source) that can do this ?
> > > > 
> > > > thanks
> > > > 
> > > > Jack
> > > > 
> > > > > Subject: Re: [OMPI users] OMPI seg fault by a class with weird 
> > > > > address.
> > > > > From: jsquy...@cisco.com
> > > > > Date: Tue, 15 Mar 2011 06:19:53 -0400
> > > > > CC: dtustud...@hotmail.com
> > > > > To: us...@open-mpi.org
> > > > > 
> > > > > You may also want to run your program through a memory-checking 
> > > > > debugger such as valgrind to see if it turns up any other problems.
> > > > > 
> > > > > AFIK, ptmalloc should be fine for use with STL vector allocation.
> > > > > 
> > > > > 
> > > > > On Mar 15, 2011, at 4:00 AM, Belaid MOA wrote:
> > > > > 
> > > > > > Hi Jack,
> > > > > > I may need to see the whole code to decide but my quick look 
> > > > > > suggest that ptmalloc is causing a problem with STL-vector 
> > > > > > allocation. ptmalloc is the openMPI internal malloc library. Could 
> > > > > > you try to build openMPI without memory management (using 
> > > > > > --without-memory-manager) and let us know the outcome. ptmalloc is 
> > > > > > not needed if you are not using an RDMA interconnect.
> > > > > > 
> > > > > > With best regards,
> > > > > > -Belaid.
> > > > > > 
> > > > > > From: dtustud...@hotmail.com
> > > > > > To: belaid_...@hotmail.com; us...@open-mpi.org
> > > > > > Subject: RE: [OMPI users] OMPI seg fault by a class with weird 
> > > > > > address.
> > > > > > Date: Tue, 15 Mar 2011 00:30:19 -0600
> > > > > > 
> > > > > > Hi, 
> > > > > > 
> > > > > > Because the code is very long, I just show the calling relationship 
> > > > > > of functions. 
> > > > > > 
> > > > > > main()
> > > > > > {
> > > > > > scheduler();
> > > > > > 
> > > > > > }
> > > > > > scheduler()
> > > > > > {
> > > > > > ImportIndices();
> > > > > > }
> > > > > > 
> > > > > > ImportIndices()
> > > > > > {
> > > > > > Index IdxNode ;
> > > > > > IdxNode = ReadFile("fileName");
> > > > > > }
> > > > > > 
> > > > > > Index ReadFile(const char* fileinput) 
> > > > > > {
> > > > > > Index TempIndex;
> > > > > > .........
> > > > > > 
> > > > > > }
> > > > > > 
> > > > > > vector<int> Index::GetPosition() const { return Position; }
> > > > > > vector<int> Index::GetColumn() const { return Column; }
> > > > > > vector<int> Index::GetYear() const { return Year; }
> > > > > > vector<string> Index::GetName() const { return Name; }
> > > > > > int Index::GetPosition(const int idx) const { return Position[idx]; 
> > > > > > }
> > > > > > int Index::GetColumn(const int idx) const { return Column[idx]; }
> > > > > > int Index::GetYear(const int idx) const { return Year[idx]; }
> > > > > > string Index::GetName(const int idx) const { return Name[idx]; }
> > > > > > int Index::GetSize() const { return Position.size(); }
> > > > > > 
> > > > > > The sequential code works well, and there is no scheduler(). 
> > > > > > 
> > > > > > The parallel code output from gdb:
> > > > > > ----------------------------------------------
> > > > > > Breakpoint 1, myNeplanTaskScheduler(CNSGA2 *, int, int, int, ._85 
> > > > > > *, char, int, message_para_to_workers_VecT &, MPI_Datatype, int &, 
> > > > > > int &, std::vector<std::vector<double, std::allocator<double> >, 
> > > > > > std::allocator<std::vector<double, std::allocator<double> > > > &, 
> > > > > > std::vector<std::vector<double, std::allocator<double> >, 
> > > > > > std::allocator<std::vector<double, std::allocator<double> > > > &, 
> > > > > > std::vector<double, std::allocator<double> > &, int, 
> > > > > > std::vector<std::vector<double, std::allocator<double> >, 
> > > > > > std::allocator<std::vector<double, std::allocator<double> > > > &, 
> > > > > > MPI_Datatype, int, MPI_Datatype, int) (nsga2=0x118c490, 
> > > > > > popSize=<value optimized out>, nodeSize=<value optimized out>, 
> > > > > > myRank=<value optimized out>, myChildpop=0x1208d80, genCandTag=65 
> > > > > > 'A', 
> > > > > > generationNum=1, myPopParaVec=std::vector of length 4, capacity 4 = 
> > > > > > {...}, 
> > > > > > message_to_master_type=0x7fffffffd540, myT1Flag=@0x7fffffffd68c, 
> > > > > > myT2Flag=@0x7fffffffd688, 
> > > > > > resultTaskPackageT1=std::vector of length 4, capacity 4 = {...}, 
> > > > > > resultTaskPackageT2Pr=std::vector of length 4, capacity 4 = {...}, 
> > > > > > xdataV=std::vector of length 4, capacity 4 = {...}, objSize=7, 
> > > > > > resultTaskPackageT12=std::vector of length 4, capacity 4 = {...}, 
> > > > > > xdata_to_workers_type=0x121c410, myGenerationNum=1, 
> > > > > > Mpara_to_workers_type=0x121b9b0, nconNum=0)
> > > > > > at src/nsga2/myNetplanScheduler.cpp:109
> > > > > > 109 ImportIndices();
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > 
> > > > > > Breakpoint 2, ImportIndices () at src/index.cpp:120
> > > > > > 120 IdxNode = ReadFile("prepdata/idx_node.csv");
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > 
> > > > > > Breakpoint 4, ReadFile (fileinput=0xd8663d "prepdata/idx_node.csv")
> > > > > > at src/index.cpp:86
> > > > > > 86 Index TempIndex;
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > 
> > > > > > Breakpoint 5, Index::Index (this=0x7fffffffcb80) at src/index.cpp:20
> > > > > > 20 Name(0) {}
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > 
> > > > > > Program received signal SIGSEGV, Segmentation fault.
> > > > > > 0x00002aaaab3b0b81 in opal_memory_ptmalloc2_int_malloc ()
> > > > > > from /opt/openmpi-1.3.4-gnu/lib/libopen-pal.so.0
> > > > > > 
> > > > > > ---------------------------------------
> > > > > > the backtrace output from the above parallel OpenMPI code:
> > > > > > 
> > > > > > (gdb) bt
> > > > > > #0 0x00002aaaab3b0b81 in opal_memory_ptmalloc2_int_malloc ()
> > > > > > from /opt/openmpi-1.3.4-gnu/lib/libopen-pal.so.0
> > > > > > #1 0x00002aaaab3b2bd3 in opal_memory_ptmalloc2_malloc ()
> > > > > > from /opt/openmpi-1.3.4-gnu/lib/libopen-pal.so.0
> > > > > > #2 0x0000003f7c8bd1dd in operator new(unsigned long) ()
> > > > > > from /usr/lib64/libstdc++.so.6
> > > > > > #3 0x00000000004646a7 in __gnu_cxx::new_allocator<int>::allocate (
> > > > > > this=0x7fffffffcb80, __n=0)
> > > > > > at 
> > > > > > /usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../include/c++/4.1.2/ext/new_allocator.h:88
> > > > > > #4 0x00000000004646cf in std::_Vector_base<int, std::allocator<int> 
> > > > > > >::_M_allocate (this=0x7fffffffcb80, __n=0)
> > > > > > at 
> > > > > > /usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../include/c++/4.1.2/bits/stl_vector.h:127
> > > > > > #5 0x0000000000464701 in std::_Vector_base<int, std::allocator<int> 
> > > > > > >::_Vector_base (this=0x7fffffffcb80, __n=0, __a=...)
> > > > > > at 
> > > > > > /usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../include/c++/4.1.2/bits/stl_vector.h:113
> > > > > > #6 0x0000000000464d0b in std::vector<int, std::allocator<int> 
> > > > > > >::vector (
> > > > > > this=0x7fffffffcb80, __n=0, __value=@0x7fffffffc968, __a=...)
> > > > > > at 
> > > > > > /usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../include/c++/4.1.2/bits/stl_vector.h:216
> > > > > > #7 0x00000000004890d7 in Index::Index (this=0x7fffffffcb80)
> > > > > > ---Type <return> to continue, or q <return> to quit---
> > > > > > at src/index.cpp:20
> > > > > > #8 0x000000000048927a in ReadFile (fileinput=0xd8663d 
> > > > > > "prepdata/idx_node.csv")
> > > > > > at src/index.cpp:86
> > > > > > #9 0x0000000000489533 in ImportIndices () at src/index.cpp:120
> > > > > > #10 0x0000000000445e0e in myNeplanTaskScheduler(CNSGA2 *, int, int, 
> > > > > > int, ._85 *, char, int, message_para_to_workers_VecT &, 
> > > > > > MPI_Datatype, int &, int &, std::vector<std::vector<double, 
> > > > > > std::allocator<double> >, std::allocator<std::vector<double, 
> > > > > > std::allocator<double> > > > &, std::vector<std::vector<double, 
> > > > > > std::allocator<double> >, std::allocator<std::vector<double, 
> > > > > > std::allocator<double> > > > &, std::vector<double, 
> > > > > > std::allocator<double> > &, int, std::vector<std::vector<double, 
> > > > > > std::allocator<double> >, std::allocator<std::vector<double, 
> > > > > > std::allocator<double> > > > &, MPI_Datatype, int, MPI_Datatype, 
> > > > > > int) (nsga2=0x118c490, 
> > > > > > popSize=<value optimized out>, nodeSize=<value optimized out>, 
> > > > > > myRank=<value optimized out>, myChildpop=0x1208d80, genCandTag=65 
> > > > > > 'A', 
> > > > > > generationNum=1, myPopParaVec=std::vector of length 4, capacity 4 = 
> > > > > > {...}, 
> > > > > > message_to_master_type=0x7fffffffd540, myT1Flag=@0x7fffffffd68c, 
> > > > > > myT2Flag=@0x7fffffffd688, 
> > > > > > resultTaskPackageT1=std::vector of length 4, capacity 4 = {...}, 
> > > > > > resultTaskPackageT2Pr=std::vector of length 4, capacity 4 = {...}, 
> > > > > > xdataV=std::vector of length 4, capacity 4 = {...}, objSize=7, 
> > > > > > resultTaskPackageT12=std::vector of length 4, capacity 4 = {...}, 
> > > > > > xdata_to_workers_type=0x121c410, myGenerationNum=1, 
> > > > > > Mpara_to_workers_type=0x121b9b0, nconNum=0)
> > > > > > ---Type <return> to continue, or q <return> to quit---
> > > > > > at src/nsga2/myNetplanScheduler.cpp:109
> > > > > > #11 0x000000000044f44b in main (argc=1, argv=0x7fffffffd998)
> > > > > > at src/nsga2/main-parallel2.cpp:216
> > > > > > ----------------------------------------------------
> > > > > > 
> > > > > > What is "opal_memory_ptmalloc2_int_malloc ()" ?
> > > > > > 
> > > > > > The gdb output from sequential code: 
> > > > > > -------------------------------------
> > > > > > Breakpoint 1, main (argc=<value optimized out>, argv=<value 
> > > > > > optimized out>)
> > > > > > at src/nsga2/main-seq.cpp:32
> > > > > > 32 ImportIndices();
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > 
> > > > > > Breakpoint 2, ImportIndices () at src/index.cpp:115
> > > > > > 115 IdxNode = ReadFile("prepdata/idx_node.csv");
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > 
> > > > > > Breakpoint 4, ReadFile (fileinput=0xd6bb9d "prepdata/idx_node.csv")
> > > > > > at src/index.cpp:86
> > > > > > 86 Index TempIndex;
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > 
> > > > > > Breakpoint 5, Index::Index (this=0x7fffffffd6d0) at src/index.cpp:20
> > > > > > 20 Name(0) {}
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > 
> > > > > > Breakpoint 4, ReadFile (fileinput=0xd6bbb3 "prepdata/idx_ud.csv")
> > > > > > at src/index.cpp:86
> > > > > > 86 Index TempIndex;
> > > > > > (gdb) bt
> > > > > > #0 ReadFile (fileinput=0xd6bbb3 "prepdata/idx_ud.csv") at 
> > > > > > src/index.cpp:86
> > > > > > #1 0x0000000000471cc9 in ImportIndices () at src/index.cpp:116
> > > > > > #2 0x000000000043bba6 in main (argc=<value optimized out>, 
> > > > > > argv=<value optimized out>) at src/nsga2/main-seq.cpp:32
> > > > > > 
> > > > > > --------------------------------------
> > > > > > thanks
> > > > > > 
> > > > > > 
> > > > > > From: belaid_...@hotmail.com
> > > > > > To: us...@open-mpi.org; dtustud...@hotmail.com
> > > > > > Subject: RE: [OMPI users] OMPI seg fault by a class with weird 
> > > > > > address.
> > > > > > Date: Tue, 15 Mar 2011 06:16:35 +0000
> > > > > > 
> > > > > > Hi Jack,
> > > > > > 1- Where is your main function to see how you called your class?
> > > > > > 2- I do not see the implementation of GetPosition, GetName, etc.?
> > > > > > 
> > > > > > With best regards,
> > > > > > -Belaid.
> > > > > > 
> > > > > > 
> > > > > > From: dtustud...@hotmail.com
> > > > > > To: us...@open-mpi.org
> > > > > > Date: Mon, 14 Mar 2011 19:04:12 -0600
> > > > > > Subject: [OMPI users] OMPI seg fault by a class with weird address.
> > > > > > 
> > > > > > Hi, 
> > > > > > 
> > > > > > I got a run-time error of a Open MPI C++ program. 
> > > > > > 
> > > > > > The following output is from gdb: 
> > > > > > 
> > > > > > --------------------------------------------------------------------------
> > > > > > Program received signal SIGSEGV, Segmentation fault.
> > > > > > 0x00002aaaab3b0b81 in opal_memory_ptmalloc2_int_malloc ()
> > > > > > from /opt/openmpi-1.3.4-gnu/lib/libopen-pal.so.0
> > > > > > 
> > > > > > At the point 
> > > > > > 
> > > > > > Breakpoint 9, Index::Index (this=0x7fffffffcb80) at src/index.cpp:20
> > > > > > 20 Name(0) {}
> > > > > > 
> > > > > > The Index has been called before this point and no problem:
> > > > > > -------------------------------------------------------
> > > > > > Breakpoint 9, Index::Index (this=0x117d800) at src/index.cpp:20
> > > > > > 20 Name(0) {}
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > 
> > > > > > Breakpoint 9, Index::Index (this=0x117d860) at src/index.cpp:20
> > > > > > 20 Name(0) {}
> > > > > > (gdb) c
> > > > > > Continuing.
> > > > > > ----------------------------------------------------------------------------
> > > > > > 
> > > > > > It seems that the 0x7fffffffcb80 address is a problem. 
> > > > > > 
> > > > > > But, I donot know the reason and how to remove the bug. 
> > > > > > 
> > > > > > Any help is really appreciated. 
> > > > > > 
> > > > > > thanks
> > > > > > 
> > > > > > the following is the index definition.
> > > > > > 
> > > > > > ---------------------------------------------------------
> > > > > > class Index {
> > > > > > public:
> > > > > > Index();
> > > > > > Index(const Index& rhs);
> > > > > > ~Index();
> > > > > > Index& operator=(const Index& rhs);
> > > > > > 
> > > > > > vector<int> GetPosition() const;
> > > > > > vector<int> GetColumn() const;
> > > > > > vector<int> GetYear() const;
> > > > > > vector<string> GetName() const;
> > > > > > int GetPosition(const int idx) const;
> > > > > > int GetColumn(const int idx) const;
> > > > > > int GetYear(const int idx) const;
> > > > > > string GetName(const int idx) const;
> > > > > > int GetSize() const;
> > > > > > 
> > > > > > void Add(const int idx, const int col, const string& name);
> > > > > > void Add(const int idx, const int col, const int year, const 
> > > > > > string& name);
> > > > > > void Add(const int idx, const Step& col, const string& name);
> > > > > > void WriteFile(const char* fileinput) const;
> > > > > > 
> > > > > > private:
> > > > > > vector<int> Position;
> > > > > > vector<int> Column;
> > > > > > vector<int> Year;
> > > > > > vector<string> Name;
> > > > > > };
> > > > > > // Contructors and destructor for the Index class
> > > > > > Index::Index() :
> > > > > > Position(0),
> > > > > > Column(0),
> > > > > > Year(0),
> > > > > > Name(0) {}
> > > > > > 
> > > > > > Index::Index(const Index& rhs) :
> > > > > > Position(rhs.GetPosition()),
> > > > > > Column(rhs.GetColumn()),
> > > > > > Year(rhs.GetYear()),
> > > > > > Name(rhs.GetName()) {}
> > > > > > 
> > > > > > Index::~Index() {}
> > > > > > 
> > > > > > Index& Index::operator=(const Index& rhs) {
> > > > > > Position = rhs.GetPosition();
> > > > > > Column = rhs.GetColumn(),
> > > > > > Year = rhs.GetYear(),
> > > > > > Name = rhs.GetName();
> > > > > > return *this;
> > > > > > }
> > > > > > ----------------------------------------------------------
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > _______________________________________________ users mailing list 
> > > > > > us...@open-mpi.org 
> > > > > > http://www.open-mpi.org/mailman/listinfo.cgi/users
> > > > > > _______________________________________________
> > > > > > users mailing list
> > > > > > us...@open-mpi.org
> > > > > > http://www.open-mpi.org/mailman/listinfo.cgi/users
> > > > > 
> > > > > 
> > > > > -- 
> > > > > Jeff Squyres
> > > > > jsquy...@cisco.com
> > > > > For corporate legal information go to:
> > > > > http://www.cisco.com/web/about/doing_business/legal/cri/
> > > > > 
> > > 
> > > 
> > > -- 
> > > Jeff Squyres
> > > jsquy...@cisco.com
> > > For corporate legal information go to:
> > > http://www.cisco.com/web/about/doing_business/legal/cri/
> > > 
                                          

Reply via email to