Joe McDonnell created KUDU-3461:
-----------------------------------

             Summary: Kudu client can blow the stack with infinite recursions 
between PickLeader() and LookupTabletByKey()
                 Key: KUDU-3461
                 URL: https://issues.apache.org/jira/browse/KUDU-3461
             Project: Kudu
          Issue Type: Bug
          Components: client
    Affects Versions: 1.17.0
            Reporter: Joe McDonnell


In an Impala cluster, we ran into a scenario that causes Impala to crash with a 
SIGSEGV. When reproducing while running in gdb, we see the stack get blown out 
with this recursion:
{noformat}
#0  0x00007f983e031a1c in clock_gettime ()
#1  0x00007f983bfda0b5 in __GI___clock_gettime (clock_id=clock_id@entry=1, 
tp=0x7f967bd8b070) at ../sysdeps/unix/sysv/linux/clock_gettime.c:38
#2  0x00007f983c9f8e48 in kudu::Stopwatch::GetTimes (times=0x7f967bd8b1b0, 
this=<optimized out>, this=<optimized out>) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:294
#3  0x00007f983ca09829 in kudu::Stopwatch::stop (this=0x7f967bd8b320) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:218
#4  kudu::Stopwatch::stop (this=0x7f967bd8b320) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:213
#5  kudu::sw_internal::LogTiming::Print (max_expected_millis=50, 
this=0x7f967bd8b320) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:359
#6  kudu::sw_internal::LogTiming::~LogTiming (this=0x7f967bd8b320, 
__in_chrg=<optimized out>) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:329
#7  0x00007f983c9fe32c in 
kudu::client::internal::MetaCache::LookupEntryByKeyFastPath (this=<optimized 
out>, table=<optimized out>, partition_key=..., entry=0x7f967bd8b4c0) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/locks.h:99
#8  0x00007f983c9fe656 in kudu::client::internal::MetaCache::DoFastPathLookup 
(this=0xde431e0, table=0xf899300, partition_key=0x7f967bd8b700, 
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, 
remote_tablet=0x0)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1243
#9  0x00007f983ca05731 in 
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable 
const*, kudu::PartitionKey, kudu::MonoTime const&, 
kudu::client::internal::MetaCache::LookupType, 
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void 
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, 
partition_key=..., deadline=..., 
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, 
remote_tablet=0x0, callback=...)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1405
#10 0x00007f983ca0598c in 
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void 
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, 
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#11 0x00007f983ca0575f in std::function<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const (__args#0=..., 
this=0x7f967bd8b8c0) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#12 
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable 
const*, kudu::PartitionKey, kudu::MonoTime const&, 
kudu::client::internal::MetaCache::LookupType, 
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void 
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, 
partition_key=..., deadline=..., 
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, 
remote_tablet=0x0, callback=...) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#13 0x00007f983ca0598c in 
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void 
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, 
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#14 0x00007f983ca0575f in std::function<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const (__args#0=..., 
this=0x7f967bd8bad0) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#15 
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable 
const*, kudu::PartitionKey, kudu::MonoTime const&, 
kudu::client::internal::MetaCache::LookupType, 
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void 
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, 
partition_key=..., deadline=..., 
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, 
remote_tablet=0x0, callback=...) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#16 0x00007f983ca0598c in 
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void 
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, 
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#17 0x00007f983ca0575f in std::function<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const (__args#0=..., 
this=0x7f967bd8bce0) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#18 
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable 
const*, kudu::PartitionKey, kudu::MonoTime const&, 
kudu::client::internal::MetaCache::LookupType, 
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void 
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, 
partition_key=..., deadline=..., 
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, 
remote_tablet=0x0, callback=...) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#19 0x00007f983ca0598c in 
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void 
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, 
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#20 0x00007f983ca0575f in std::function<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const (__args#0=..., 
this=0x7f967bd8bef0) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#21 
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable 
const*, kudu::PartitionKey, kudu::MonoTime const&, 
kudu::client::internal::MetaCache::LookupType, 
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void 
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, 
partition_key=..., deadline=..., 
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, 
remote_tablet=0x0, callback=...) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#22 0x00007f983ca0598c in 
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void 
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, 
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#23 0x00007f983ca0575f in std::function<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const (__args#0=..., 
this=0x7f967bd8c100) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#24 
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable 
const*, kudu::PartitionKey, kudu::MonoTime const&, 
kudu::client::internal::MetaCache::LookupType, 
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void 
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, 
partition_key=..., deadline=..., 
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, 
remote_tablet=0x0, callback=...) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#25 0x00007f983ca0598c in 
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void 
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, 
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#26 0x00007f983ca0575f in std::function<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const (__args#0=..., 
this=0x7f967bd8c310) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#27 
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable 
const*, kudu::PartitionKey, kudu::MonoTime const&, 
kudu::client::internal::MetaCache::LookupType, 
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void 
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, 
partition_key=..., deadline=..., 
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, 
remote_tablet=0x0, callback=...) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408

... continues ...

#47617 0x00007f983ca0598c in 
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void 
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, 
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#47618 0x00007f983ca0575f in std::function<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const (__args#0=..., 
this=0x7f967c589290) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47619 
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable 
const*, kudu::PartitionKey, kudu::MonoTime const&, 
kudu::client::internal::MetaCache::LookupType, 
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void 
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, 
partition_key=..., deadline=..., 
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, 
remote_tablet=0x0, callback=...) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#47620 0x00007f983ca0598c in 
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void 
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, 
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
--Type <RET> for more, q to quit, c to continue without paging--
#47621 0x00007f983ca0575f in std::function<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const (__args#0=..., 
this=0x7f967c5894a0) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47622 
kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable 
const*, kudu::PartitionKey, kudu::MonoTime const&, 
kudu::client::internal::MetaCache::LookupType, 
scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void 
(kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, 
partition_key=..., deadline=..., 
lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, 
remote_tablet=0x0, callback=...) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408
#47623 0x00007f983ca0598c in 
kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void 
(kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, 
kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153
#47624 0x00007f983ca066a7 in std::function<void (kudu::Status 
const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0xca50918) 
at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617#47625 
kudu::client::internal::LookupRpc::SendRpcCb (this=0xca50800, status=...) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:966
#47626 0x00007f983c9db65c in 
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
 
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}::operator()()
 const (this=<optimized out>, this=<optimized out>)
    at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/status.h:230#47627 
std::__invoke_impl<void, 
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
 
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&>(std::__invoke_other,
 
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
 kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&) 
(__f=...) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/invoke.h:60
#47628 std::__invoke_r<void, 
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
 kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&>(void&&, 
(kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
 kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&)...) 
(__fn=...) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/invoke.h:110
#47629 std::_Function_handler<void (), 
kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB,
 
kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}>::_M_invoke(std::_Any_data
 const&) (__functor=...)
    at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:291
#47630 0x00007f983cac860b in std::function<void ()>::operator()() const 
(this=0xee3f9c0) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47631 kudu::rpc::OutboundCall::CallCallback (this=0xee3f840) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/outbound_call.cc:309
#47632 0x00007f983cabb763 in kudu::rpc::Connection::HandleCallResponse 
(this=0xcd00700, transfer=...) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/unique_ptr.h:172
#47633 0x00007f983cabc215 in kudu::rpc::Connection::ReadHandler 
(this=0xcd00700, watcher=..., revents=<optimized out>) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/unique_ptr.h:172#47634 
0x00007f983cdb3ffb in ev_invoke_pending (loop=0xcc99b00) at 
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3155
#47635 0x00007f983ca97cc8 in kudu::rpc::ReactorThread::InvokePendingCb 
(loop=0xcc99b00) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/reactor.cc:202
#47636 0x00007f983cdb73f7 in ev_run (flags=0, loop=0xcc99b00) at 
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3555
#47637 ev_run (loop=0xcc99b00, flags=0) at 
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3402
#47638 0x00007f983ca98bd9 in ev::loop_ref::run (flags=0, this=0xef75be0) at 
/mnt/source/kudu/kudu-345fd44ca3/thirdparty/installed/uninstrumented/include/ev++.h:211#47639
 kudu::rpc::ReactorThread::RunThread (this=0xef75bd8) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/reactor.cc:503
#47640 0x00007f983cc2d36c in std::function<void ()>::operator()() const 
(this=0xec68358) at 
/mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617
#47641 kudu::Thread::SuperviseThread (arg=0xec68300) at 
/mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/thread.cc:691
#47642 0x00007f983dfec609 in start_thread (arg=<optimized out>) at 
pthread_create.c:477
#47643 0x00007f983c01c133 in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:95{noformat}
It hits a SIGSEGV because the stack gets blown out.

Here are the steps to reproduce it from Impala:
{noformat}
/** 1. Create table **/
drop table if exists impala_crash;
create table if not exists impala_crash (
dt string,
col string,
primary key(dt)
)
partition by range(dt) (
partition values <= '00000000'
)
stored as kudu;/** 2. alter and insert **/
alter table impala_crash drop if exists range partition value='20230301';
alter table impala_crash add if not exists range partition value='20230301';
insert into impala_crash values ('20230301','abc');
/* normal *//** 3. Run the same queries again and impala daemon crashes **/
alter table impala_crash drop if exists range partition value='20230301';
alter table impala_crash add if not exists range partition value='20230301';
insert into impala_crash values ('20230301','abc');{noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to