This is an automated email from the ASF dual-hosted git repository.

maedhroz pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git

commit ab1ce5938d89a08ebdfbd8ccca48498066b7cbda
Merge: f91655df06 7a8335c273
Author: Caleb Rackliffe <calebrackli...@gmail.com>
AuthorDate: Thu May 8 14:42:19 2025 -0500

    Merge branch 'cassandra-5.0' into trunk
    
    * cassandra-5.0:
      Optimize initial skipping logic for SAI queries on large partitions

 CHANGES.txt                                        |   1 +
 .../sai/plan/StorageAttachedIndexSearcher.java     |  58 +++-
 .../index/sai/cql/IntraPartitionSkippingTest.java  | 318 +++++++++++++++++++++
 3 files changed, 375 insertions(+), 2 deletions(-)

diff --cc CHANGES.txt
index 77d5b4eb71,c073719105..dc088c7457
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,195 -1,6 +1,196 @@@
 -5.0.5
 +5.1
 + * When a custom disk error handler fails to initiate, fail the startup of a 
node instead of using the no-op handler (CASSANDRA-20614)
 + * Rewrite constraint framework to remove column specification from 
constraint definition, introduce SQL-like NOT NULL (CASSANDRA-20563)
 + * Fix a bug in AutoRepair duration metric calculation if schedule finishes 
quickly (CASSANDRA-20622)
 + * Fix AutoRepair flaky InJvm dtest (CASSANDRA-20620)
 + * Increasing default for auto_repair.sstable_upper_threshold considering 
large Cassandra tables & revert three lines removed from CHANGES.txt due to a 
merge mistake (CASSANDRA-20586)
 + * Fix token restrictions with MIN_TOKEN (CASSANDRO-20557)
 + * Upgrade logback version to 1.5.18 and slf4j dependencies to 2.0.17 
(CASSANDRA-20429)
 + * Switch memtable-related off-heap objects to Native Endian and Memory to 
Little Endian (CASSANDRA-20190)
 + * Change SSTableSimpleScanner to use SSTableReader#openDataReaderForScan 
(CASSANDRA-20538)
 + * Automated Repair Inside Cassandra [CEP-37] (CASSANDRA-19918)
 + * Implement appender of slow queries to system_views.slow_queries table 
(CASSANDRA-13001)
 + * Add autocompletion in CQLSH for built-in functions (CASSANDRA-19631)
 + * Grant permission on keyspaces system_views and system_virtual_schema not 
possible (CASSANDRA-20171)
 + * General Purpose Transactions (Accord) [CEP-15] (CASSANDRA-17092)
 + * Improve performance when getting writePlacementsAllSettled from 
ClusterMetadata (CASSANDRA-20526)
 + * Add nodetool command to dump the contents of the 
system_views.{cluster_metadata_log, cluster_metadata_directory} tables 
(CASSANDRA-20525)
 + * Fix TreeMap race in CollectionVirtualTableAdapter causing us to lose rows 
in the virtual table (CASSANDRA-20524)
 + * Improve metadata log catch up with inter-DC mutation forwarding 
(CASSANDRA-20523)
 + * Support topology-safe changes to Datacenter & Rack for live nodes 
(CASSANDRA-20528)
 + * Add SSTableIntervalTree latency metric (CASSANDRA-20502)
 + * Ignore repetitions of semicolon in CQLSH (CASSANDRA-19956)
 + * Avoid NPE during cms initialization abort (CASSANDRA-20527)
 + * Avoid failing queries when epoch changes and replica goes up/down 
(CASSANDRA-20489)
 + * Split out truncation record lock (CASSANDRA-20480)
 + * Throw new IndexBuildInProgressException when queries fail during index 
build, instead of IndexNotAvailableException (CASSANDRA-20402)
 + * Fix Paxos repair interrupts running transactions (CASSANDRA-20469)
 + * Various fixes in constraint framework (CASSANDRA-20481)
 + * Add support in CAS for -= on numeric types, and fixed improper handling of 
empty bytes which lead to NPE (CASSANDRA-20477)
 + * Do not fail to start a node with materialized views after they are turned 
off in config (CASSANDRA-20452)
 + * Fix nodetool gcstats output, support human-readable units and more output 
formats (CASSANDRA-19022)
 + * Various gossip to TCM upgrade fixes (CASSANDRA-20483)
 + * Add nodetool command to abort failed nodetool cms initialize 
(CASSANDRA-20482)
 + * Repair Paxos for the distributed metadata log when CMS membership changes 
(CASSANDRA-20467)
 + * Reintroduce CASSANDRA-17411 in trunk (CASSANDRA-19346)
 + * Add min/max/mean/percentiles to timer metrics vtable (CASSANDRA-20466)
 + * Add support for time, date, timestamp types in scalar constraint 
(CASSANDRA-20274)
 + * Add regular expression constraint (CASSANDRA-20275)
 + * Improve constraints autocompletion (CASSANDRA-20341)
 + * Add JVM version and Cassandra build date to nodetool version -v 
(CASSANDRA-19721)
 + * Move all disk error logic to DiskErrorsHandler to enable pluggability 
(CASSANDRA-20363)
 + * Fix marking an SSTable as suspected and BufferPool leakage in case of a 
corrupted SSTable read during a compaction (CASSANDRA-20396)
 + * Add missed documentation for CREATE TABLE LIKE (CASSANDRA-20401)
 + * Add OCTET_LENGTH constraint (CASSANDRA-20340)
 + * Reduce memory allocations in miscellaneous places along the hot write path 
(CASSANDRA-20167)
 + * Provide keystore_password_file and truststore_password_file options to 
read credentials from a file (CASSANDRA-13428)
 + * Unregistering a node should also remove it from tokenMap if it is there 
and recalculate the placements (CASSANDRA-20346)
 + * Fix PartitionUpdate.isEmpty deserialization issue to avoid potential 
EOFException (CASSANDRA-20345)
 + * Avoid adding LEFT nodes to tokenMap on upgrade from gossip 
(CASSANDRA-20344)
 + * Allow empty placements when deserializing cluster metadata 
(CASSANDRA-20343)
 + * Reduce heap pressure when initializing CMS (CASSANDRA-20267)
 + * Paxos Repair: NoSuchElementException on 
DistributedSchema.getKeyspaceMetadata (CASSANDRA-20320)
 + * Improve performance of DistributedSchema.validate for large schemas 
(CASSANDRA-20360)
 + * Add JSON constraint (CASSANDRA-20273)
 + * Prevent invalid constraint combinations (CASSANDRA-20330)
 + * Support CREATE TABLE LIKE WITH INDEXES (CASSANDRA-19965)
 + * Invalidate relevant prepared statements on every change to TableMetadata 
(CASSANDRA-20318)
 + * Add per type max size guardrails (CASSANDRA-19677)
 + * Make it possible to abort all kinds of multi step operations 
(CASSANDRA-20217)
 + * Do not leak non-Java exceptions when calling snapshot operations via JMX 
(CASSANDRA-20335)
 + * Implement NOT_NULL constraint (CASSANDRA-20276)
 + * Improve error messages for constraints (CASSANDRA-20266)
 + * Add system_views.partition_key_statistics for querying SSTable metadata 
(CASSANDRA-20161)
 + * CEP-42 - Add Constraints Framework (CASSANDRA-19947)
 + * Add table metric PurgeableTombstoneScannedHistogram and a tracing event 
for scanned purgeable tombstones (CASSANDRA-20132)
 + * Make sure we can parse the expanded CQL before writing it to the log or 
sending it to replicas (CASSANDRA-20218)
 + * Add format_bytes and format_time functions (CASSANDRA-19546)
 + * Fix error when trying to assign a tuple to target type not being a tuple 
(CASSANDRA-20237)
 + * Fail CREATE TABLE LIKE statement if UDTs in target keyspace do not exist 
or they have different structure from ones in source keyspace (CASSANDRA-19966)
 + * Support octet_length and length functions (CASSANDRA-20102)
 + * Make JsonUtils serialize Instant always with the same format 
(CASSANDRA-20209)
 + * Port Harry v2 to trunk (CASSANDRA-20200)
 + * Enable filtering of snapshots on keyspace, table and snapshot name in 
nodetool listsnapshots (CASSANDRA-20151)
 + * Create manifest upon loading where it does not exist or enrich it 
(CASSANDRA-20150)
 + * Propagate true size of snapshot in SnapshotDetailsTabularData to not call 
JMX twice in nodetool listsnapshots (CASSANDRA-20149)
 + * Implementation of CEP-43 - copying a table via CQL by CREATE TABLE LIKE 
(CASSANDRA-19964)
 + * Periodically disconnect roles that are revoked or have LOGIN=FALSE set 
(CASSANDRA-19385)
 + * AST library for CQL-based fuzz tests (CASSANDRA-20198)
 + * Support audit logging for JMX operations (CASSANDRA-20128)
 + * Enable sorting of nodetool status output (CASSANDRA-20104)
 + * Support downgrading after CMS is initialized (CASSANDRA-20145)
 + * Deprecate IEndpointSnitch (CASSANDRA-19488)
 + * Check presence of a snapshot in a case-insensitive manner on macOS 
platform to prevent hardlinking failures (CASSANDRA-20146)
 + * Enable JMX server configuration to be in cassandra.yaml (CASSANDRA-11695)
 + * Parallelized UCS compactions (CASSANDRA-18802)
 + * Avoid prepared statement invalidation race when committing schema changes 
(CASSANDRA-20116)
 + * Restore optimization in MultiCBuilder around building one clustering 
(CASSANDRA-20129)
 + * Consolidate all snapshot management to SnapshotManager and introduce 
SnapshotManagerMBean (CASSANDRA-18111)
 + * Fix RequestFailureReason constants codes (CASSANDRA-20126)
 + * Introduce SSTableSimpleScanner for compaction (CASSANDRA-20092)
 + * Include column drop timestamp in alter table transformation 
(CASSANDRA-18961)
 + * Make JMX SSL configurable in cassandra.yaml (CASSANDRA-18508)
 + * Fix cqlsh CAPTURE command to save query results without trace details when 
TRACING is ON (CASSANDRA-19105)
 + * Optionally prevent tombstone purging during repair (CASSANDRA-20071)
 + * Add post-filtering support for the IN operator in SAI queries 
(CASSANDRA-20025)
 + * Don’t finish ongoing decommission and move operations during startup 
(CASSANDRA-20040)
 + * Nodetool reconfigure cms has correct return code when streaming fails 
(CASSANDRA-19972)
 + * Reintroduce RestrictionSet#iterator() optimization around multi-column 
restrictions (CASSANDRA-20034)
 + * Explicitly localize strings to Locale.US for internal implementation 
(CASSANDRA-19953)
 + * Add -H option for human-friendly output in nodetool compactionhistory 
(CASSANDRA-20015)
 + * Fix type check for referenced duration type for nested types 
(CASSANDRA-19890)
 + * In simulation tests, correctly set the tokens of replacement nodes 
(CASSANDRA-19997)
 + * During TCM upgrade, retain all properties of existing system tables 
(CASSANDRA-19992)
 + * Properly cancel in-flight futures and reject requests in 
EpochAwareDebounce during shutdown (CASSANDRA-19848)
 + * Provide clearer exception message on failing commitlog_disk_access_mode 
combinations (CASSANDRA-19812)
 + * Add total space used for a keyspace to nodetool tablestats 
(CASSANDRA-19671)
 + * Ensure Relation#toRestriction() handles ReversedType properly 
(CASSANDRA-19950)
 + * Add JSON and YAML output option to nodetool gcstats (CASSANDRA-19771)
 + * Introduce metadata serialization version V4 (CASSANDRA-19970)
 + * Allow CMS reconfiguration to work around DOWN nodes (CASSANDRA-19943)
 + * Make TableParams.Serializer set allowAutoSnapshots and incrementalBackups 
(CASSANDRA-19954)
 + * Make sstabledump possible to show tombstones only (CASSANDRA-19939)
 + * Ensure that RFP queries potentially stale replicas even with only key 
columns in the row filter (CASSANDRA-19938)
 + * Allow nodes to change IP address while upgrading to TCM (CASSANDRA-19921)
 + * Retain existing keyspace params on system tables after upgrade 
(CASSANDRA-19916)
 + * Deprecate use of gossip state for paxos electorate verification 
(CASSANDRA-19904)
 + * Update dtest-api to 0.0.17 to fix jvm17 crash in jvm-dtests 
(CASSANDRA-19239)
 + * Add resource leak test and Update Netty to 4.1.113.Final to fix leak 
(CASSANDRA-19783)
 + * Fix incorrect nodetool suggestion when gossip mode is running 
(CASSANDRA-19905)
 + * SAI support for BETWEEN operator (CASSANDRA-19688)
 + * Fix BETWEEN filtering for reversed clustering columns (CASSANDRA-19878)
 + * Retry if node leaves CMS while committing a transformation 
(CASSANDRA-19872)
 + * Add support for NOT operators in WHERE clauses. Fixed Three Valued Logic 
(CASSANDRA-18584)
 + * Allow getendpoints for system tables and make sure getNaturalReplicas work 
for MetaStrategy (CASSANDRA-19846)
 + * On upgrade, handle pre-existing tables with unexpected table ids 
(CASSANDRA-19845)
 + * Reconfigure CMS before assassinate (CASSANDRA-19768)
 + * Warn about unqualified prepared statement only if it is select or 
modification statement (CASSANDRA-18322)
 + * Update legacy peers tables during node replacement (CASSANDRA-19782)
 + * Refactor ColumnCondition (CASSANDRA-19620)
 + * Allow configuring log format for Audit Logs (CASSANDRA-19792)
 + * Support for noboolean rpm (centos7 compatible) packages removed 
(CASSANDRA-19787)
 + * Allow threads waiting for the metadata log follower to be interrupted 
(CASSANDRA-19761)
 + * Support dictionary lookup for CassandraPasswordValidator (CASSANDRA-19762)
 + * Disallow denylisting keys in system_cluster_metadata (CASSANDRA-19713)
 + * Fix gossip status after replacement (CASSANDRA-19712)
 + * Ignore repair requests for system_cluster_metadata (CASSANDRA-19711)
 + * Avoid ClassCastException when verifying tables with reversed partitioner 
(CASSANDRA-19710)
 + * Always repair the full range when repairing system_cluster_metadata 
(CASSANDRA-19709)
 + * Use table-specific partitioners during Paxos repair (CASSANDRA-19714)
 + * Expose current compaction throughput in nodetool (CASSANDRA-13890)
 + * CEP-24 Password validation / generation (CASSANDRA-17457)
 + * Reconfigure CMS after replacement, bootstrap and move operations 
(CASSANDRA-19705)
 + * Support querying LocalStrategy tables with any partitioner 
(CASSANDRA-19692)
 + * Relax slow_query_log_timeout for MultiNodeSAITest (CASSANDRA-19693)
 + * Audit Log entries are missing identity for mTLS connections 
(CASSANDRA-19669)
 + * Add support for the BETWEEN operator in WHERE clauses (CASSANDRA-19604)
 + * Replace Stream iteration with for-loop for 
SimpleRestriction::bindAndGetClusteringElements (CASSANDRA-19679)
 + * Consolidate logging on trace level (CASSANDRA-19632)
 + * Expand DDL statements on coordinator before submission to the CMS 
(CASSANDRA-19592)
 + * Fix number of arguments of String.format() in various classes 
(CASSANDRA-19645)
 + * Remove unused fields from config (CASSANDRA-19599)
 + * Refactor Relation and Restriction hierarchies (CASSANDRA-19341)
 + * Raise priority of TCM internode messages during critical operations 
(CASSANDRA-19517)
 + * Add nodetool command to unregister LEFT nodes (CASSANDRA-19581)
 + * Add cluster metadata id to gossip syn messages (CASSANDRA-19613)
 + * Reduce heap usage occupied by the metrics (CASSANDRA-19567)
 + * Improve handling of transient replicas during range movements 
(CASSANDRA-19344)
 + * Enable debounced internode log requests to be cancelled at shutdown 
(CASSANDRA-19514)
 + * Correctly set last modified epoch when combining multistep operations into 
a single step (CASSANDRA-19538)
 + * Add new TriggersPolicy configuration to allow operators to disable 
triggers (CASSANDRA-19532)
 + * Use Transformation.Kind.id in local and distributed log tables 
(CASSANDRA-19516)
 + * Remove period field from ClusterMetadata and metadata log tables 
(CASSANDRA-19482)
 + * Enrich system_views.pending_hints vtable with hints sizes (CASSANDRA-19486)
 + * Expose all dropwizard metrics in virtual tables (CASSANDRA-14572)
 + * Ensured that PropertyFileSnitchTest do not overwrite 
cassandra-toploogy.properties (CASSANDRA-19502)
 + * Add option for MutualTlsAuthenticator to restrict the certificate validity 
period (CASSANDRA-18951)
 + * Fix StorageService::constructRangeToEndpointMap for non-distributed 
keyspaces (CASSANDRA-19255)
 + * Group nodetool cms commands into single command group (CASSANDRA-19393)
 + * Register the measurements of the bootstrap process as Dropwizard metrics 
(CASSANDRA-19447)
 + * Add LIST SUPERUSERS CQL statement (CASSANDRA-19417)
 + * Modernize CQLSH datetime conversions (CASSANDRA-18879)
 + * Harry model and in-JVM tests for partition-restricted 2i queries 
(CASSANDRA-18275)
 + * Refactor cqlshmain global constants (CASSANDRA-19201)
 + * Remove native_transport_port_ssl (CASSANDRA-19397)
 + * Make nodetool reconfigurecms sync by default and add --cancel to be able 
to cancel ongoing reconfigurations (CASSANDRA-19216)
 + * Expose auth mode in system_views.clients, nodetool clientstats, metrics 
(CASSANDRA-19366)
 + * Remove sealed_periods and last_sealed_period tables (CASSANDRA-19189)
 + * Improve setup and initialisation of LocalLog/LogSpec (CASSANDRA-19271)
 + * Refactor structure of caching metrics and expose auth cache metrics via 
JMX (CASSANDRA-17062)
 + * Allow CQL client certificate authentication to work without sending an 
AUTHENTICATE request (CASSANDRA-18857)
 + * Extend nodetool tpstats and system_views.thread_pools with detailed pool 
parameters (CASSANDRA-19289)
 + * Remove dependency on Sigar in favor of OSHI (CASSANDRA-16565)
 + * Simplify the bind marker and Term logic (CASSANDRA-18813)
 + * Limit cassandra startup to supported JDKs, allow higher JDKs by setting 
CASSANDRA_JDK_UNSUPPORTED (CASSANDRA-18688)
 + * Standardize nodetool tablestats formatting of data units (CASSANDRA-19104)
 + * Make nodetool tablestats use number of significant digits for time and 
average values consistently (CASSANDRA-19015)
 + * Upgrade jackson to 2.15.3 and snakeyaml to 2.1 (CASSANDRA-18875)
 + * Transactional Cluster Metadata [CEP-21] (CASSANDRA-18330)
 + * Add ELAPSED command to cqlsh (CASSANDRA-18861)
 + * Add the ability to disable bulk loading of SSTables (CASSANDRA-18781)
 + * Clean up obsolete functions and simplify cql_version handling in cqlsh 
(CASSANDRA-18787)
 +Merged from 5.0:
+  * Optimize initial skipping logic for SAI queries on large partitions 
(CASSANDRA-20191)
 - * Fix reading mmapped trie-index exceeding 2GiB (CASSANDRA-20351)
   * zero copy streaming allocates direct memory that isn't used, but does help 
to fragment the memory space (CASSANDRA-20577)
   * CQLSSTableWriter supports setting the format (BTI or Big) (CASSANDRA-20609)
   * Don't allocate in ThreadLocalReadAheadBuffer#close() (CASSANDRA-20551)
diff --cc 
test/unit/org/apache/cassandra/index/sai/cql/IntraPartitionSkippingTest.java
index 0000000000,b9e42640e7..a6537920a9
mode 000000,100644..100644
--- 
a/test/unit/org/apache/cassandra/index/sai/cql/IntraPartitionSkippingTest.java
+++ 
b/test/unit/org/apache/cassandra/index/sai/cql/IntraPartitionSkippingTest.java
@@@ -1,0 -1,318 +1,318 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  *     http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+ 
+ package org.apache.cassandra.index.sai.cql;
+ 
+ import org.junit.Ignore;
+ import org.junit.Test;
+ 
+ import org.HdrHistogram.Histogram;
+ import org.apache.cassandra.index.sai.SAITester;
+ 
+ /**
+  * Tests for verifying intra-partition and partition-level skipping 
optimizations
+  * introduced in CASSANDRA-20191 for SAI.
+  * <p>
+  * These tests validate that Cassandra can efficiently skip over rows
+  * within a partition using clustering filters (name and slice), paging, 
reversed order,
+  * and sparse matches.
+  * <p>
+  * Each test documents a scenario where skipping logic is expected to apply 
along with few where it doesn't skip.
+  */
+ public class IntraPartitionSkippingTest extends SAITester
+ {
+     @Test
+     public void testNameFilterExactMatch() throws Throwable
+     {
+         createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY 
(pk, ck))");
+         createIndex("CREATE INDEX ON %s(val) USING 'sai'");
+ 
+         for (int ck = 0; ck < 10; ck++)
+         {
+             execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, 
"val" + ck);
+         }
+ 
+         beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE 
pk = 1 AND ck = 5 AND val = 'val5' ALLOW FILTERING"),
+                                              row(1, 5,"val5")));
+     }
+ 
+     @Test
+     public void testSliceFilterRangeMatch() throws Throwable
+     {
+         createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY 
(pk, ck))");
+         createIndex("CREATE INDEX ON %s(val) USING 'sai'");
+ 
+         for (int ck = 0; ck < 100; ck++)
+         {
+             execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, 
"val" + ck);
+         }
+ 
+         beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE 
pk = 1 AND ck > 90 AND val = 'val99' ALLOW FILTERING"),
+                                              row(1, 99,"val99")));
+     }
+ 
+     @Test
+     public void testReversedClustering() throws Throwable
+     {
+         createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY 
(pk, ck)) WITH CLUSTERING ORDER BY (ck DESC)");
+         createIndex("CREATE INDEX ON %s(val) USING 'sai'");
+ 
+         for (int ck = 0; ck < 20; ck++)
+         {
+             execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, 
"val" + ck);
+         }
+ 
+         beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE 
pk = 1 AND ck < 10 AND val = 'val5'  ALLOW FILTERING"),
+                                              row(1,5,"val5")));
+     }
+ 
+     @Test
+     public void testSkippingWithPaging() throws Throwable
+     {
+         createTable("CREATE TABLE %S (pk int, ck int, val int, PRIMARY KEY 
(pk, ck))");
+ 
+         createIndex("CREATE INDEX ON %s(val) USING 'sai'");
+ 
+         for (int ck = 0; ck < 100; ck++)
+         {
+             int val = 1000 + ck;
+             execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, 
val);
+         }
+ 
+         beforeAndAfterFlush(() -> assertRowsNet(executeNetWithPaging("SELECT 
* FROM %s WHERE pk = 1 AND ck > 90 AND val > 1090 ALLOW FILTERING", 5),
 -                                                row(1, 91, 1091),
 -                                                row(1, 92, 1092),
 -                                                row(1, 93, 1093),
 -                                                row(1, 94, 1094),
 -                                                row(1, 95, 1095),
 -                                                row(1, 96, 1096),
 -                                                row(1, 97, 1097),
 -                                                row(1, 98, 1098),
 -                                                row(1, 99, 1099)));
++                      row(1, 91, 1091),
++                      row(1, 92, 1092),
++                      row(1, 93, 1093),
++                      row(1, 94, 1094),
++                      row(1, 95, 1095),
++                      row(1, 96, 1096),
++                      row(1, 97, 1097),
++                      row(1, 98, 1098),
++                      row(1, 99, 1099)));
+     }
+ 
+     @Test
+     public void testCompositeClusteringKeySkipping() throws Throwable
+     {
+         createTable("CREATE TABLE %S (pk int, ck1 int, ck2 int, val text, 
PRIMARY KEY (pk, ck1, ck2))");
+         createIndex("CREATE INDEX ON %s(val) USING 'sai'");
+ 
+         for (int ck1 = 0; ck1 < 10; ck1++)
+             for (int ck2 = 0; ck2 < 10; ck2++)
+                 execute("INSERT INTO %s (pk, ck1, ck2, val) VALUES (?, ?, ?, 
?)", 1, ck1, ck2, "v" + (ck1*10+ck2));
+ 
+ 
+         beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE 
pk = 1 AND ck1 = 9 AND ck2 = 9 AND val = 'v99' ALLOW FILTERING"),
+                                              row(1,9,9,"v99")));
+ 
+     }
+ 
+     @Test
+     public void testSparseMatch() throws Throwable
+     {
+         createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY 
(pk, ck))");
+         createIndex("CREATE INDEX ON %s(val) USING 'sai'");
+ 
+         for (int ck = 0; ck < 1000; ck++)
+         {
+             String value = (ck % 450 == 0) ? "insert" : "skip";
+             execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, 
value);
+         }
+ 
+         beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE 
pk = 1 AND ck > 899 AND val = 'insert' ALLOW FILTERING"),
+                                              row(1,900,"insert")));
+ 
+     }
+ 
+     @Test
+     public void testMultipleNameFilters() throws Throwable
+     {
+         createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY 
(pk, ck))");
+         createIndex("CREATE INDEX ON %s(val) USING 'sai'");
+ 
+         for (int i = 0; i < 20; i++)
+             execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, i, 
"v5");
+ 
+         beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE 
pk = 1 AND ck IN (5, 10, 15) AND val = 'v5' ALLOW FILTERING"),
+                                              row(1,5,"v5"), row(1,10,"v5"), 
row(1,15,"v5")));
+ 
+     }
+ 
+     // Multiple partition range scans won't skip
+     @Test
+     public void testPartitionRangeSkipping() throws Throwable
+     {
+         createTable("CREATE TABLE %S (pk int, ck int, val text, PRIMARY KEY 
(pk, ck))");
+         createIndex("CREATE INDEX ON %s(val) USING 'sai'");
+ 
+         for (int pk = 0; pk < 10; pk++)
+             for (int ck = 0; ck < 5; ck++)
+                 execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", pk, 
ck, "value" + pk);
+ 
+         beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE 
val = 'value9' AND ck > 2 ALLOW FILTERING"),
+                                              row(9,3,"value9"), 
row(9,4,"value9")));
+ 
+     }
+ 
+     @Test
+     public void testStaticColumns() throws Throwable
+     {
+         createTable("CREATE TABLE %S (pk int, ck int, s text static, val 
text, PRIMARY KEY (pk, ck))");
+         createIndex("CREATE INDEX ON %s(val) USING 'sai'");
+ 
+         execute("INSERT INTO %s (pk, s) VALUES (?, ?)", 1, "static1");
+ 
+         for (int ck = 0; ck < 200; ck++)
+         {
+             execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", 1, ck, 
"val" + ck);
+         }
+ 
+ 
+         // We will not skip
+         beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE 
pk = 1 AND ck > 100 AND s = 'static1' AND val = 'val101' ALLOW FILTERING"),
+                                              row(1,101,"static1","val101")));
+ 
+         // we will skip
+         beforeAndAfterFlush(() -> assertRows(execute("SELECT * FROM %s WHERE 
pk = 1 AND ck > 100  AND val = 'val101' ALLOW FILTERING"),
+                                              row(1,101,"static1","val101")));
+     }
+ 
+     @Test
+     public void testNextKeyClusteringIndexNamesFilter() throws Throwable
+     {
+         createTable("CREATE TABLE %S (" +
+                     "pk int," +
+                     "ck int," +
+                     "v int," +
+                     "PRIMARY KEY (pk, ck))");
+ 
+         createIndex("CREATE INDEX ON %s(v) USING 'sai'");
+ 
+         int pk = 1;
+         for (int ck = 0; ck < 10; ck++)
+         {
+             int v = ck + 1000;
+             execute("INSERT INTO %s (pk, ck, v) VALUES (?, ?, ?)", pk, ck, v);
+         }
+ 
+         int pk1 = 2;
+         for (int ck = 0; ck < 100; ck++)
+         {
+             execute("INSERT INTO %s (pk, ck, v) VALUES (?, ?, ?)", pk1, ck, 
ck);
+         }
+ 
+         beforeAndAfterFlush(() -> {
 -            assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck = 5 AND 
v > 1004 ALLOW FILTERING"),
 -                       row(1, 5, 1005));
++                                assertRows(execute("SELECT * FROM %s WHERE pk 
= 1 AND ck = 5 AND v > 1004 ALLOW FILTERING"),
++                                           row(1, 5, 1005));
+ 
 -            assertRows(execute("SELECT * FROM %s WHERE pk = 1 AND ck = 5 AND 
v > 1004 AND v < 20000 ALLOW FILTERING"),
 -                       row(1, 5, 1005));
 -        });
++                                assertRows(execute("SELECT * FROM %s WHERE pk 
= 1 AND ck = 5 AND v > 1004 AND v < 20000 ALLOW FILTERING"),
++                                           row(1, 5, 1005));
++                            });
+ 
+ 
+     }
+ 
+     // Performance testing test-cases and can be ingnored.
+     @Ignore ("performance test case for Index Slice filter.")
+     @Test
+     public void testNextKeyPerfClusteringIndexSliceFilter()
+     {
+         createTable("CREATE TABLE %S (" +
+                     "pk int, " +
+                     "ck int, " +
+                     "val text, " +
+                     "PRIMARY KEY (pk, ck))");
+ 
+         createIndex("CREATE INDEX ON %s(val) USING 'sai'");
+ 
+         int pk = 1;
+         for (int ck = 0; ck < 10000; ck++)
+         {
+             execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", pk, ck, 
"hello1");
+         }
+ 
+         int pk1 = 2;
+         for (int ck = 0; ck < 100; ck++)
+         {
+             execute("INSERT INTO %s (pk, ck, val) VALUES (?, ?, ?)", pk1, ck, 
"hello2");
+         }
+ 
+         Histogram histogram = new Histogram(4);
+ 
+ 
+         for (int i = 0; i < 10000; i++)
+         {
+             long start = System.nanoTime();
+             execute("SELECT * FROM %s WHERE pk = 1 AND ck > 9000 AND val = 
'hello1' ALLOW FILTERING");
+             histogram.recordValue(System.nanoTime() - start);
+ 
+             if (i % 1000 == 0)
+             {
+                 System.out.println("50th: " + 
histogram.getValueAtPercentile(0.5));
+                 System.out.println("95th: " + 
histogram.getValueAtPercentile(0.95));
+                 System.out.println("99th: " + 
histogram.getValueAtPercentile(0.99));
+             }
+         }
+ 
+     }
+ 
+ 
+     @Ignore ("performance test case for Index Names filter.")
+     @Test
+     public void testNextKeyPerfClusteringIndexNamesFilter()
+     {
+         createTable("CREATE TABLE %S (" +
+                     "pk int," +
+                     "ck int," +
+                     "v int," +
+                     "PRIMARY KEY (pk, ck))");
+ 
+         createIndex("CREATE INDEX ON %s(v) USING 'sai'");
+ 
+         int pk = 1;
+         for (int ck = 0; ck < 20000; ck++)
+         {
+             int v = ck + 10;
+             execute("INSERT INTO %s (pk, ck, v) VALUES (?, ?, ?)", pk, ck, v);
+         }
+ 
+         int pk1 = 2;
+         for (int ck = 0; ck < 100; ck++)
+         {
+             execute("INSERT INTO %s (pk, ck, v) VALUES (?, ?, ?)", pk1, ck, 
ck);
+         }
+ 
+         Histogram histogram = new Histogram(4);
+ 
+         for (int i = 0; i < 10000; i++)
+         {
+             long start = System.nanoTime();
+             execute("SELECT * FROM %s WHERE pk = 1 AND ck = 15000 AND v > 
9000 ALLOW FILTERING");
+             histogram.recordValue(System.nanoTime() - start);
+ 
+             if (i % 1000 == 0)
+             {
+                 System.out.println("50th: " + 
histogram.getValueAtPercentile(0.5));
+                 System.out.println("95th: " + 
histogram.getValueAtPercentile(0.95));
+                 System.out.println("99th: " + 
histogram.getValueAtPercentile(0.99));
+             }
+         }
+ 
+     }
+ 
+ }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to