[ https://issues.apache.org/jira/browse/CASSANDRA-20567?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
David Capwell updated CASSANDRA-20567: -------------------------------------- Fix Version/s: 5.0.5 5.1 (was: 5.x) (was: 5.0.x) Since Version: 5.0 Source Control Link: https://github.com/apache/cassandra/commit/07831c9cc7efcb9be7b227260467cf10a7be7724 Resolution: Fixed Status: Resolved (was: Ready to Commit) > SAI marks an index as non-empty when a partial partition/row modifications is > flushed due to repair > --------------------------------------------------------------------------------------------------- > > Key: CASSANDRA-20567 > URL: https://issues.apache.org/jira/browse/CASSANDRA-20567 > Project: Apache Cassandra > Issue Type: Bug > Components: Consistency/Streaming, Feature/SAI > Reporter: David Capwell > Assignee: David Capwell > Priority: Normal > Fix For: 5.0.5, 5.1 > > Attachments: > ci_summary-cassandra-5.0-b90fb7e7a0e49115d04c61691f2bd54daba29a48.html, > ci_summary-trunk-fe43a45db9838c77cc7703a17a33e8bb703a1fe2.html, > result_details-cassandra-5.0-b90fb7e7a0e49115d04c61691f2bd54daba29a48.tar.gz, > result_details-trunk-fe43a45db9838c77cc7703a17a33e8bb703a1fe2.tar.gz > > Time Spent: 1h 20m > Remaining Estimate: 0h > > This was found while testing cep-15-accord, so ported the testing to trunk > and found the behavior is there as well… below is the patch to hit it > SHA: 6b2cdba56b85b948a8716a02b2cd3015e8d1ff9a > {code} > diff --git > a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java > > b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java > index d6c0183473..db754ab276 100644 > --- > a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java > +++ > b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkBase.java > @@ -70,7 +70,9 @@ public abstract class MultiNodeTableWalkBase extends > SingleNodeTableWalkTest > .set("read_request_timeout", "180s") > .set("write_request_timeout", "180s") > .set("native_transport_timeout", "180s") > - .set("slow_query_log_timeout", "180s"); > + .set("slow_query_log_timeout", "180s") > + .set("repair.retries.max_attempts", Integer.MAX_VALUE) > + ; > } > @Override > @@ -100,6 +102,12 @@ public abstract class MultiNodeTableWalkBase extends > SingleNodeTableWalkTest > return true; > } > + @Override > + public boolean allowRepair() > + { > + return true; > + } > + > @Override > protected IInvokableInstance selectInstance(RandomSource rs) > { > diff --git > a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java > > b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java > index 5a0ce66ccc..be4fd7e403 100644 > --- > a/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java > +++ > b/test/distributed/org/apache/cassandra/distributed/test/cql3/MultiNodeTableWalkWithoutReadRepairTest.java > @@ -38,5 +38,7 @@ public class MultiNodeTableWalkWithoutReadRepairTest > extends MultiNodeTableWalkB > // CQL_DEBUG_APPLY_OPERATOR = true; > // When mutations look to be lost as seen by more complex SELECTs, it > can be useful to just SELECT the partition/row right after to write to see if > it was safe at the time. > // READ_AFTER_WRITE = true; > + > + builder.withSeed(7960925307057789470L).withExamples(1); > } > } > diff --git > a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java > > b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java > index 924bd3eeeb..3d9dadd493 100644 > --- > a/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java > +++ > b/test/distributed/org/apache/cassandra/distributed/test/cql3/SingleNodeTableWalkTest.java > @@ -126,6 +126,13 @@ public class SingleNodeTableWalkTest extends > StatefulASTBase > return Collections.singletonList(CreateIndexDDL.SAI); > } > + public Property.Command<State, Void, ?> repair(RandomSource rs, State > state) > + { > + // see org.apache.cassandra.fuzz.sai.MultiNodeSAITestBase.repair > + // cluster.get(1).nodetool("repair", schema.keyspace); > + return new Property.SimpleCommand<>("repair", s2 -> > s2.cluster.get(1).nodetoolResult("repair", s2.metadata.keyspace, > s2.metadata.name).asserts().success()); > + } > + > public Property.Command<State, Void, ?> selectExisting(RandomSource rs, > State state) > { > NavigableSet<BytesPartitionState.Ref> keys = > state.model.partitionKeys(); > @@ -362,6 +369,7 @@ public class SingleNodeTableWalkTest extends > StatefulASTBase > statefulBuilder.check(commands(() -> rs -> createState(rs, > cluster)) > .add(StatefulASTBase::insert) > .add(StatefulASTBase::fullTableScan) > + .addIf(State::allowRepair, this::repair) > .addIf(State::hasPartitions, > this::selectExisting) > .addAllIf(State::supportTokens, b -> > b.add(this::selectToken) > > .add(this::selectTokenRange)) > @@ -593,6 +601,11 @@ public class SingleNodeTableWalkTest extends > StatefulASTBase > return !(model.isEmpty() || > searchableNonPartitionColumns.isEmpty()); > } > + public boolean allowRepair() > + { > + return false; > + } > + > @Override > public String toString() > { > {code} > With that patch, run > org.apache.cassandra.distributed.test.cql3.MultiNodeTableWalkWithoutReadRepairTest > and it should fail at step 79 > {code} > 79: repair > {code} > Root cause looks to be > {code} > WARN 18:39:14 [Stream #13815194-1dd2-11b2-bd2b-0119500a7b6e] Stream failed: > Session peer /127.0.0.1:7012 Failed because of an unknown exception > java.io.UncheckedIOException: java.nio.file.NoSuchFileException: > /private/var/folders/h1/s_3p1x3s3hl0hltbpck67m0h0000gn/T/dtests11917950086796980781/node3/data2/ks1/tbl-1b255f4def2540a6000000000000000d/oa-10-big-SAI+aa+tbl_pk0+Meta.db > > org.apache.cassandra.index.sai.disk.v1.V1OnDiskFormat.rethrowIOException(V1OnDiskFormat.java:259) > > org.apache.cassandra.index.sai.disk.v1.V1OnDiskFormat.validateIndexComponent(V1OnDiskFormat.java:250) > java.nio.file.NoSuchFileException: > /private/var/folders/h1/s_3p1x3s3hl0hltbpck67m0h0000gn/T/dtests11917950086796980781/node3/data2/ks1/tbl-1b255f4def2540a6000000000000000d/oa-10-big-SAI+aa+tbl_pk0+Meta.db > > java.base/sun.nio.fs.UnixException.translateToIOException(UnixException.java:92) > > java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org