[ 
https://issues.apache.org/jira/browse/KUDU-2654?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Deng ZiWu updated KUDU-2654:
----------------------------
    Description: 
i have a kudu cluster

tserver config like this

 

--fs_wal_dir=/data/kuduTabletDir

--fs_data_dirs=/data/kuduTabletDir

--log_dir=/data/kuduTabletLogDir

 

in the tserver 

cd /data/kuduTabletDir/data

ll | wc -l

1962

cd /data/kuduTabletDir/tablet-meta

ll | wc -l

233

 

du -sh /data/kuduTabletDir/data

16G /data/kuduTabletDir/data

 

when i stop and start

 

I0106 22:59:15.685855  2430 fs_manager.cc:256] Time spent opening block 
manager: real 449.267s user 0.000s sys 0.000s

 

it took almost 8 minutes to finish block manager. only 16G data,the disk is ssd

 

when in lager cluster ,it can spent hours to start

 

the pstack result is like this .

#0  0x00007fa673052945 in pthread_cond_wait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0

#1  0x00000000019638f1 in kudu::AsyncLogger::RunThread() ()

#2  0x00007fa6727d22b0 in ?? () from /lib64/libstdc++.so.6

#3  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0

#4  0x00007fa671f3a34d in clone () from /lib64/libc.so.6

Thread 8 (Thread 0x7fa6700cd700 (LWP 2434)):

#0  0x00007fa673052945 in pthread_cond_wait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0

#1  0x00000000019638f1 in kudu::AsyncLogger::RunThread() ()

#2  0x00007fa6727d22b0 in ?? () from /lib64/libstdc++.so.6

#3  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0

#4  0x00007fa671f3a34d in clone () from /lib64/libc.so.6

Thread 7 (Thread 0x7fa66f8cc700 (LWP 2435)):

#0  0x00007fa673052945 in pthread_cond_wait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0

#1  0x00000000019638f1 in kudu::AsyncLogger::RunThread() ()

#2  0x00007fa6727d22b0 in ?? () from /lib64/libstdc++.so.6

#3  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0

#4  0x00007fa671f3a34d in clone () from /lib64/libc.so.6

Thread 6 (Thread 0x7fa66f0cb700 (LWP 2436)):

#0  0x00007fa673052cf2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0

#1  0x00000000018cc71e in kudu::ConditionVariable::TimedWait(kudu::MonoDelta 
const&) const ()

#2  0x00000000019067db in kudu::KernelStackWatchdog::RunThread() ()

#3  0x000000000194fbba in kudu::Thread::SuperviseThread(void*) ()

#4  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0

#5  0x00007fa671f3a34d in clone () from /lib64/libc.so.6

Thread 5 (Thread 0x7fa66e8ca700 (LWP 2437)):

#0  0x00007fa673056371 in sigwait () from /lib64/libpthread.so.0

#1  0x0000000001923ae3 in 
kudu::MinidumpExceptionHandler::RunUserSignalHandlerThread() ()

#2  0x000000000194fbba in kudu::Thread::SuperviseThread(void*) ()

#3  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0

#4  0x00007fa671f3a34d in clone () from /lib64/libc.so.6

Thread 4 (Thread 0x7fa66e0c9700 (LWP 2438)):

#0  0x00007fa673052945 in pthread_cond_wait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0

#1  0x00000000019552e5 in kudu::ThreadPool::DispatchThread(bool) ()

#2  0x000000000194fbba in kudu::Thread::SuperviseThread(void*) ()

#3  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0

#4  0x00007fa671f3a34d in clone () from /lib64/libc.so.6

Thread 3 (Thread 0x7fa66d6b5700 (LWP 2439)):

#0  0x00007fa673052cf2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0

#1  0x00000000018cc71e in kudu::ConditionVariable::TimedWait(kudu::MonoDelta 
const&) const ()

#2  0x00000000018f7ffc in kudu::FileCache<kudu::RWFile>::RunDescriptorExpiry() 
()

#3  0x000000000194fbba in kudu::Thread::SuperviseThread(void*) ()

#4  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0

#5  0x00007fa671f3a34d in clone () from /lib64/libc.so.6

Thread 2 (Thread 0x7fa66ceb4700 (LWP 2440)):

#0  0x00007fa673055f73 in pread64 () from /lib64/libpthread.so.0

#1  0x00000000018e02fe in kudu::(anonymous 
namespace)::PosixRandomAccessFile::Read(unsigned long, unsigned long, 
kudu::Slice*, unsigned char*) const ()

#2  0x00000000018ebedf in kudu::env_util::ReadFully(kudu::RandomAccessFile*, 
unsigned long, unsigned long, kudu::Slice*, unsigned char*) ()

#3  0x0000000001937ca0 in kudu::Status kudu::pb_util::(anonymous 
namespace)::ValidateAndReadData<kudu::RandomAccessFile>(kudu::RandomAccessFile*,
 unsigned long, unsigned long*, uns

igned long, kudu::Slice*, std::unique_ptr<unsigned char [], 
std::default_delete<unsigned char []> >*) [clone .constprop.142] ()

#4  0x000000000193a657 in kudu::Status kudu::pb_util::(anonymous 
namespace)::ReadPBStartingAt<kudu::RandomAccessFile>(kudu::RandomAccessFile*, 
int, unsigned long*, google::protobuf

::Message*) ()

#5  0x000000000193b729 in 
kudu::pb_util::ReadablePBContainerFile::ReadNextPB(google::protobuf::Message*) 
()

#6  0x000000000189854c in 
kudu::fs::internal::LogBlockContainer::ReadContainerRecords(std::deque<kudu::BlockRecordPB,
 std::allocator<kudu::BlockRecordPB> >*) const ()

#7  0x000000000189b0ed in 
kudu::fs::LogBlockManager::OpenDataDir(kudu::fs::DataDir*, kudu::Status*) ()

#8  0x0000000001956edd in kudu::FunctionRunnable::Run() ()

#9  0x0000000001954cd3 in kudu::ThreadPool::DispatchThread(bool) ()

#10 0x000000000194fbba in kudu::Thread::SuperviseThread(void*) ()

#11 0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0

#12 0x00007fa671f3a34d in clone () from /lib64/libc.so.6

Thread 1 (Thread 0x7fa67426e940 (LWP 2430)):

#0  0x00007fa673052945 in pthread_cond_wait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0

#1  0x0000000001953be8 in kudu::ThreadPool::Wait() ()

#2  0x000000000189d380 in kudu::fs::LogBlockManager::Open() ()

#3  0x000000000188f99e in kudu::FsManager::Open() ()

#4  0x00000000008c0011 in kudu::server::ServerBase::Init() ()

#5  0x00000000007d5d2b in kudu::tserver::TabletServer::Init() ()

#6  0x00000000007d450a in kudu::tserver::TabletServerMain(int, char**) ()

#7  0x00007fa671e63c05 in __libc_start_main () from /lib64/libc.so.6

#8  0x00000000007d4181 in _start ()

 

  was:
i have a kudu cluster

tserver config like this

 

--fs_wal_dir=/data/kuduTabletDir

--fs_data_dirs=/data/kuduTabletDir

--log_dir=/data/kuduTabletLogDir

 

in the tserver 

cd /data/kuduTabletDir/data

ll | wc -l

1962

cd /data/kuduTabletDir/tablet-meta

ll | wc -l

233

 

du -sh /data/kuduTabletDir/data

16G /data/kuduTabletDir/data

 

when i stop and start

 

I0106 22:59:15.685855  2430 fs_manager.cc:256] Time spent opening block 
manager: real 449.267s user 0.000s sys 0.000s

 

it took almost 8 minutes to finish block manager. only 16G data,the disk is ssd

 

when in lager cluster ,it can spent hours to start

 


> log block manager very slow
> ---------------------------
>
>                 Key: KUDU-2654
>                 URL: https://issues.apache.org/jira/browse/KUDU-2654
>             Project: Kudu
>          Issue Type: Improvement
>    Affects Versions: 1.3.0, 1.4.0, 1.5.0
>            Reporter: Deng ZiWu
>            Priority: Critical
>
> i have a kudu cluster
> tserver config like this
>  
> --fs_wal_dir=/data/kuduTabletDir
> --fs_data_dirs=/data/kuduTabletDir
> --log_dir=/data/kuduTabletLogDir
>  
> in the tserver 
> cd /data/kuduTabletDir/data
> ll | wc -l
> 1962
> cd /data/kuduTabletDir/tablet-meta
> ll | wc -l
> 233
>  
> du -sh /data/kuduTabletDir/data
> 16G /data/kuduTabletDir/data
>  
> when i stop and start
>  
> I0106 22:59:15.685855  2430 fs_manager.cc:256] Time spent opening block 
> manager: real 449.267s user 0.000s sys 0.000s
>  
> it took almost 8 minutes to finish block manager. only 16G data,the disk is 
> ssd
>  
> when in lager cluster ,it can spent hours to start
>  
> the pstack result is like this .
> #0  0x00007fa673052945 in pthread_cond_wait@@GLIBC_2.3.2 () from 
> /lib64/libpthread.so.0
> #1  0x00000000019638f1 in kudu::AsyncLogger::RunThread() ()
> #2  0x00007fa6727d22b0 in ?? () from /lib64/libstdc++.so.6
> #3  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0
> #4  0x00007fa671f3a34d in clone () from /lib64/libc.so.6
> Thread 8 (Thread 0x7fa6700cd700 (LWP 2434)):
> #0  0x00007fa673052945 in pthread_cond_wait@@GLIBC_2.3.2 () from 
> /lib64/libpthread.so.0
> #1  0x00000000019638f1 in kudu::AsyncLogger::RunThread() ()
> #2  0x00007fa6727d22b0 in ?? () from /lib64/libstdc++.so.6
> #3  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0
> #4  0x00007fa671f3a34d in clone () from /lib64/libc.so.6
> Thread 7 (Thread 0x7fa66f8cc700 (LWP 2435)):
> #0  0x00007fa673052945 in pthread_cond_wait@@GLIBC_2.3.2 () from 
> /lib64/libpthread.so.0
> #1  0x00000000019638f1 in kudu::AsyncLogger::RunThread() ()
> #2  0x00007fa6727d22b0 in ?? () from /lib64/libstdc++.so.6
> #3  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0
> #4  0x00007fa671f3a34d in clone () from /lib64/libc.so.6
> Thread 6 (Thread 0x7fa66f0cb700 (LWP 2436)):
> #0  0x00007fa673052cf2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from 
> /lib64/libpthread.so.0
> #1  0x00000000018cc71e in kudu::ConditionVariable::TimedWait(kudu::MonoDelta 
> const&) const ()
> #2  0x00000000019067db in kudu::KernelStackWatchdog::RunThread() ()
> #3  0x000000000194fbba in kudu::Thread::SuperviseThread(void*) ()
> #4  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0
> #5  0x00007fa671f3a34d in clone () from /lib64/libc.so.6
> Thread 5 (Thread 0x7fa66e8ca700 (LWP 2437)):
> #0  0x00007fa673056371 in sigwait () from /lib64/libpthread.so.0
> #1  0x0000000001923ae3 in 
> kudu::MinidumpExceptionHandler::RunUserSignalHandlerThread() ()
> #2  0x000000000194fbba in kudu::Thread::SuperviseThread(void*) ()
> #3  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0
> #4  0x00007fa671f3a34d in clone () from /lib64/libc.so.6
> Thread 4 (Thread 0x7fa66e0c9700 (LWP 2438)):
> #0  0x00007fa673052945 in pthread_cond_wait@@GLIBC_2.3.2 () from 
> /lib64/libpthread.so.0
> #1  0x00000000019552e5 in kudu::ThreadPool::DispatchThread(bool) ()
> #2  0x000000000194fbba in kudu::Thread::SuperviseThread(void*) ()
> #3  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0
> #4  0x00007fa671f3a34d in clone () from /lib64/libc.so.6
> Thread 3 (Thread 0x7fa66d6b5700 (LWP 2439)):
> #0  0x00007fa673052cf2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from 
> /lib64/libpthread.so.0
> #1  0x00000000018cc71e in kudu::ConditionVariable::TimedWait(kudu::MonoDelta 
> const&) const ()
> #2  0x00000000018f7ffc in 
> kudu::FileCache<kudu::RWFile>::RunDescriptorExpiry() ()
> #3  0x000000000194fbba in kudu::Thread::SuperviseThread(void*) ()
> #4  0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0
> #5  0x00007fa671f3a34d in clone () from /lib64/libc.so.6
> Thread 2 (Thread 0x7fa66ceb4700 (LWP 2440)):
> #0  0x00007fa673055f73 in pread64 () from /lib64/libpthread.so.0
> #1  0x00000000018e02fe in kudu::(anonymous 
> namespace)::PosixRandomAccessFile::Read(unsigned long, unsigned long, 
> kudu::Slice*, unsigned char*) const ()
> #2  0x00000000018ebedf in kudu::env_util::ReadFully(kudu::RandomAccessFile*, 
> unsigned long, unsigned long, kudu::Slice*, unsigned char*) ()
> #3  0x0000000001937ca0 in kudu::Status kudu::pb_util::(anonymous 
> namespace)::ValidateAndReadData<kudu::RandomAccessFile>(kudu::RandomAccessFile*,
>  unsigned long, unsigned long*, uns
> igned long, kudu::Slice*, std::unique_ptr<unsigned char [], 
> std::default_delete<unsigned char []> >*) [clone .constprop.142] ()
> #4  0x000000000193a657 in kudu::Status kudu::pb_util::(anonymous 
> namespace)::ReadPBStartingAt<kudu::RandomAccessFile>(kudu::RandomAccessFile*, 
> int, unsigned long*, google::protobuf
> ::Message*) ()
> #5  0x000000000193b729 in 
> kudu::pb_util::ReadablePBContainerFile::ReadNextPB(google::protobuf::Message*)
>  ()
> #6  0x000000000189854c in 
> kudu::fs::internal::LogBlockContainer::ReadContainerRecords(std::deque<kudu::BlockRecordPB,
>  std::allocator<kudu::BlockRecordPB> >*) const ()
> #7  0x000000000189b0ed in 
> kudu::fs::LogBlockManager::OpenDataDir(kudu::fs::DataDir*, kudu::Status*) ()
> #8  0x0000000001956edd in kudu::FunctionRunnable::Run() ()
> #9  0x0000000001954cd3 in kudu::ThreadPool::DispatchThread(bool) ()
> #10 0x000000000194fbba in kudu::Thread::SuperviseThread(void*) ()
> #11 0x00007fa67304ee25 in start_thread () from /lib64/libpthread.so.0
> #12 0x00007fa671f3a34d in clone () from /lib64/libc.so.6
> Thread 1 (Thread 0x7fa67426e940 (LWP 2430)):
> #0  0x00007fa673052945 in pthread_cond_wait@@GLIBC_2.3.2 () from 
> /lib64/libpthread.so.0
> #1  0x0000000001953be8 in kudu::ThreadPool::Wait() ()
> #2  0x000000000189d380 in kudu::fs::LogBlockManager::Open() ()
> #3  0x000000000188f99e in kudu::FsManager::Open() ()
> #4  0x00000000008c0011 in kudu::server::ServerBase::Init() ()
> #5  0x00000000007d5d2b in kudu::tserver::TabletServer::Init() ()
> #6  0x00000000007d450a in kudu::tserver::TabletServerMain(int, char**) ()
> #7  0x00007fa671e63c05 in __libc_start_main () from /lib64/libc.so.6
> #8  0x00000000007d4181 in _start ()
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to