[
https://issues.apache.org/jira/browse/KUDU-3737?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Alexey Serbin updated KUDU-3737:
--------------------------------
Description:
One of the UNDO delta list's invariants wasn't held:
{noformat}
F20260206 08:07:50.635933 4636 compaction.cc:1529] Check failed: is_deleted
{noformat}
That corresponds to the
{code}
// Sanity check for UNDO list.
static void UndoListSanityCheck(Mutation* new_undos_head) {
auto* u = new_undos_head;
bool is_deleted = false;
// The resulting list should have the following invariants:
// - deletes can only be observed if not already deleted
// - reinserts can only be observed if deleted
// - UNDO mutations are in decreasing order
while (u != nullptr) {
if (u->changelist().is_delete()) {
CHECK(!is_deleted);
is_deleted = true;
} else if (u->changelist().is_reinsert()) {
CHECK(is_deleted); // <-- this was line 1529 in the corresponding
source code (the triggered CHECK())
is_deleted = false;
}
if (!u->next()) break;
CHECK_GE(u->timestamp(), u->next()->timestamp());
u = u->next();
}
}
{code}
The stack trace looked like below:
{noformat}
*** Check failure stack trace: ***
*** Aborted at 1770365270 (unix time) try "date -d @1770365270" if you are
using GNU date ***
PC: @ 0x0 (unknown)
*** SIGABRT (@0x3e8000011c6) received by PID 4550 (TID 0x7f1907551700) from PID
4550; stack trace: ***
@ 0x7f191159c980 (unknown) at ??:0
@ 0x7f190e64efb7 gsignal at ??:0
@ 0x7f190e650921 abort at ??:0
@ 0x7f190fa5ddbd google::LogMessage::Fail() at ??:0
@ 0x7f190fa61b83 google::LogMessage::SendToLog() at ??:0
@ 0x7f190fa5d7bc google::LogMessage::Flush() at ??:0
@ 0x7f190fa5ef49 google::LogMessageFatal::~LogMessageFatal() at ??:0
@ 0x7f191454657b kudu::tablet::UndoListSanityCheck() at ??:0
@ 0x7f1914546f69 kudu::tablet::ApplyMutationsAndMergeDuplicateHistory()
at ??:0
@ 0x7f19145474d8 kudu::tablet::FlushCompactionInput() at ??:0
@ 0x7f1914458887 kudu::tablet::Tablet::DoMergeCompactionOrFlush() at
??:0
@ 0x7f191445b8fb kudu::tablet::Tablet::Compact() at ??:0
@ 0x5578e48d45a1
kudu::tablet::MultiThreadedTabletTest<>::CompactThread() at ??:0
@ 0x5578e48bfcc9
_ZZN4kudu6tablet46MultiThreadedTabletTest_DeleteAndReinsert_TestINS0_22NullableValueTestSetupEE8TestBodyEvENKUlmmE1_clEmm
at ??:0
@ 0x5578e48f496e
_ZNSt17_Function_handlerIFvmmEZN4kudu6tablet46MultiThreadedTabletTest_DeleteAndReinsert_TestINS2_22NullableValueTestSetupEE8TestBodyEvEUlmmE1_E9_M_invokeERKSt9_Any_dataOmSB_
at ??:0
@ 0x5578e48eee9d std::function<>::operator()() at ??:0
@ 0x5578e48d2413
_ZZN4kudu6tablet23MultiThreadedTabletTestINS0_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEENKUlvE_clEv
at ??:0
@ 0x5578e492997d
_ZSt13__invoke_implIvZN4kudu6tablet23MultiThreadedTabletTestINS1_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_JEET_St14__invoke_otherOT0_DpOT1_
at ??:0
@ 0x5578e4928a04
_ZSt8__invokeIZN4kudu6tablet23MultiThreadedTabletTestINS1_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_JEENSt15__invoke_resultIT_JDpT0_EE4typeEOSC_DpOSD_
at ??:0
@ 0x5578e492bd7e
_ZNSt6thread8_InvokerISt5tupleIJZN4kudu6tablet23MultiThreadedTabletTestINS3_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_EEE9_M_invokeIJLm0EEEEDTcl8__invokespcl10_S_declvalIXT_EEEEESt12_Index_tupleIJXspT_EEE
at ??:0
@ 0x5578e492bc4f
_ZNSt6thread8_InvokerISt5tupleIJZN4kudu6tablet23MultiThreadedTabletTestINS3_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_EEEclEv
at ??:0
@ 0x5578e492bb5a
_ZNSt6thread11_State_implINS_8_InvokerISt5tupleIJZN4kudu6tablet23MultiThreadedTabletTestINS4_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_EEEEE6_M_runEv
at ??:0
@ 0x7f190f0746df (unknown) at ??:0
@ 0x7f19115916db start_thread at ??:0
@ 0x7f190e73171f clone at ??:0
{noformat}
was:
{noformat}
*** Check failure stack trace: ***
*** Aborted at 1770365270 (unix time) try "date -d @1770365270" if you are
using GNU date ***
PC: @ 0x0 (unknown)
*** SIGABRT (@0x3e8000011c6) received by PID 4550 (TID 0x7f1907551700) from PID
4550; stack trace: ***
@ 0x7f191159c980 (unknown) at ??:0
@ 0x7f190e64efb7 gsignal at ??:0
@ 0x7f190e650921 abort at ??:0
@ 0x7f190fa5ddbd google::LogMessage::Fail() at ??:0
@ 0x7f190fa61b83 google::LogMessage::SendToLog() at ??:0
@ 0x7f190fa5d7bc google::LogMessage::Flush() at ??:0
@ 0x7f190fa5ef49 google::LogMessageFatal::~LogMessageFatal() at ??:0
@ 0x7f191454657b kudu::tablet::UndoListSanityCheck() at ??:0
@ 0x7f1914546f69 kudu::tablet::ApplyMutationsAndMergeDuplicateHistory()
at ??:0
@ 0x7f19145474d8 kudu::tablet::FlushCompactionInput() at ??:0
@ 0x7f1914458887 kudu::tablet::Tablet::DoMergeCompactionOrFlush() at
??:0
@ 0x7f191445b8fb kudu::tablet::Tablet::Compact() at ??:0
@ 0x5578e48d45a1
kudu::tablet::MultiThreadedTabletTest<>::CompactThread() at ??:0
@ 0x5578e48bfcc9
_ZZN4kudu6tablet46MultiThreadedTabletTest_DeleteAndReinsert_TestINS0_22NullableValueTestSetupEE8TestBodyEvENKUlmmE1_clEmm
at ??:0
@ 0x5578e48f496e
_ZNSt17_Function_handlerIFvmmEZN4kudu6tablet46MultiThreadedTabletTest_DeleteAndReinsert_TestINS2_22NullableValueTestSetupEE8TestBodyEvEUlmmE1_E9_M_invokeERKSt9_Any_dataOmSB_
at ??:0
@ 0x5578e48eee9d std::function<>::operator()() at ??:0
@ 0x5578e48d2413
_ZZN4kudu6tablet23MultiThreadedTabletTestINS0_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEENKUlvE_clEv
at ??:0
@ 0x5578e492997d
_ZSt13__invoke_implIvZN4kudu6tablet23MultiThreadedTabletTestINS1_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_JEET_St14__invoke_otherOT0_DpOT1_
at ??:0
@ 0x5578e4928a04
_ZSt8__invokeIZN4kudu6tablet23MultiThreadedTabletTestINS1_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_JEENSt15__invoke_resultIT_JDpT0_EE4typeEOSC_DpOSD_
at ??:0
@ 0x5578e492bd7e
_ZNSt6thread8_InvokerISt5tupleIJZN4kudu6tablet23MultiThreadedTabletTestINS3_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_EEE9_M_invokeIJLm0EEEEDTcl8__invokespcl10_S_declvalIXT_EEEEESt12_Index_tupleIJXspT_EEE
at ??:0
@ 0x5578e492bc4f
_ZNSt6thread8_InvokerISt5tupleIJZN4kudu6tablet23MultiThreadedTabletTestINS3_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_EEEclEv
at ??:0
@ 0x5578e492bb5a
_ZNSt6thread11_State_implINS_8_InvokerISt5tupleIJZN4kudu6tablet23MultiThreadedTabletTestINS4_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_EEEEE6_M_runEv
at ??:0
@ 0x7f190f0746df (unknown) at ??:0
@ 0x7f19115916db start_thread at ??:0
@ 0x7f190e73171f clone at ??:0
{noformat}
> MultiThreadedTabletTest/5.DeleteAndReinsert is flaky (TypeParam
> kudu::tablet::NullableValueTestSetup)
> -----------------------------------------------------------------------------------------------------
>
> Key: KUDU-3737
> URL: https://issues.apache.org/jira/browse/KUDU-3737
> Project: Kudu
> Issue Type: Bug
> Components: test, tserver
> Affects Versions: 1.19.0
> Reporter: Alexey Serbin
> Priority: Major
> Attachments: mt-tablet-test.3.20260206.txt.xz
>
>
> One of the UNDO delta list's invariants wasn't held:
> {noformat}
> F20260206 08:07:50.635933 4636 compaction.cc:1529] Check failed: is_deleted
> {noformat}
> That corresponds to the
> {code}
> // Sanity check for UNDO list.
> static void UndoListSanityCheck(Mutation* new_undos_head) {
> auto* u = new_undos_head;
> bool is_deleted = false;
> // The resulting list should have the following invariants:
> // - deletes can only be observed if not already deleted
> // - reinserts can only be observed if deleted
> // - UNDO mutations are in decreasing order
> while (u != nullptr) {
> if (u->changelist().is_delete()) {
> CHECK(!is_deleted);
> is_deleted = true;
> } else if (u->changelist().is_reinsert()) {
> CHECK(is_deleted); // <-- this was line 1529 in the corresponding
> source code (the triggered CHECK())
> is_deleted = false;
> }
> if (!u->next()) break;
> CHECK_GE(u->timestamp(), u->next()->timestamp());
> u = u->next();
> }
> }
> {code}
> The stack trace looked like below:
> {noformat}
> *** Check failure stack trace: ***
> *** Aborted at 1770365270 (unix time) try "date -d @1770365270" if you are
> using GNU date ***
> PC: @ 0x0 (unknown)
> *** SIGABRT (@0x3e8000011c6) received by PID 4550 (TID 0x7f1907551700) from
> PID 4550; stack trace: ***
> @ 0x7f191159c980 (unknown) at ??:0
> @ 0x7f190e64efb7 gsignal at ??:0
> @ 0x7f190e650921 abort at ??:0
> @ 0x7f190fa5ddbd google::LogMessage::Fail() at ??:0
> @ 0x7f190fa61b83 google::LogMessage::SendToLog() at ??:0
> @ 0x7f190fa5d7bc google::LogMessage::Flush() at ??:0
> @ 0x7f190fa5ef49 google::LogMessageFatal::~LogMessageFatal() at ??:0
> @ 0x7f191454657b kudu::tablet::UndoListSanityCheck() at ??:0
> @ 0x7f1914546f69
> kudu::tablet::ApplyMutationsAndMergeDuplicateHistory() at ??:0
> @ 0x7f19145474d8 kudu::tablet::FlushCompactionInput() at ??:0
> @ 0x7f1914458887 kudu::tablet::Tablet::DoMergeCompactionOrFlush() at
> ??:0
> @ 0x7f191445b8fb kudu::tablet::Tablet::Compact() at ??:0
> @ 0x5578e48d45a1
> kudu::tablet::MultiThreadedTabletTest<>::CompactThread() at ??:0
> @ 0x5578e48bfcc9
> _ZZN4kudu6tablet46MultiThreadedTabletTest_DeleteAndReinsert_TestINS0_22NullableValueTestSetupEE8TestBodyEvENKUlmmE1_clEmm
> at ??:0
> @ 0x5578e48f496e
> _ZNSt17_Function_handlerIFvmmEZN4kudu6tablet46MultiThreadedTabletTest_DeleteAndReinsert_TestINS2_22NullableValueTestSetupEE8TestBodyEvEUlmmE1_E9_M_invokeERKSt9_Any_dataOmSB_
> at ??:0
> @ 0x5578e48eee9d std::function<>::operator()() at ??:0
> @ 0x5578e48d2413
> _ZZN4kudu6tablet23MultiThreadedTabletTestINS0_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEENKUlvE_clEv
> at ??:0
> @ 0x5578e492997d
> _ZSt13__invoke_implIvZN4kudu6tablet23MultiThreadedTabletTestINS1_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_JEET_St14__invoke_otherOT0_DpOT1_
> at ??:0
> @ 0x5578e4928a04
> _ZSt8__invokeIZN4kudu6tablet23MultiThreadedTabletTestINS1_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_JEENSt15__invoke_resultIT_JDpT0_EE4typeEOSC_DpOSD_
> at ??:0
> @ 0x5578e492bd7e
> _ZNSt6thread8_InvokerISt5tupleIJZN4kudu6tablet23MultiThreadedTabletTestINS3_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_EEE9_M_invokeIJLm0EEEEDTcl8__invokespcl10_S_declvalIXT_EEEEESt12_Index_tupleIJXspT_EEE
> at ??:0
> @ 0x5578e492bc4f
> _ZNSt6thread8_InvokerISt5tupleIJZN4kudu6tablet23MultiThreadedTabletTestINS3_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_EEEclEv
> at ??:0
> @ 0x5578e492bb5a
> _ZNSt6thread11_State_implINS_8_InvokerISt5tupleIJZN4kudu6tablet23MultiThreadedTabletTestINS4_22NullableValueTestSetupEE12StartThreadsEmRKSt8functionIFvmmEEEUlvE_EEEEE6_M_runEv
> at ??:0
> @ 0x7f190f0746df (unknown) at ??:0
> @ 0x7f19115916db start_thread at ??:0
> @ 0x7f190e73171f clone at ??:0
> {noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)