Follow-up question: it seems that range queries on the *second* field
of a CompositeType(UUIDType(), UUIDType()) do not work.
If I concatenate the two UUID.hex values into a 32-character string
instead of a CompositeType of two UUIDs, then range queries work
correctly.
This is illustrated below... so the question is: what is the point of a
CompositeType if range queries only work on the first field? Is it just a
convenience class for keeping things strongly typed and cleanly organized,
or did I break something in the way I setup CompositeType in the example
earlier in this thread?
def join_uuids(*uuids):
return ''.join(map(attrgetter('hex'), uuids))
def split_uuids(uuid_str):
return map(lambda s: uuid.UUID(hex=''.join(s)), grouper(uuid_str, 32))
def grouper(iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
args = [iter(iterable)] * n
return itertools.izip_longest(fillvalue=fillvalue, *args)
def
test_composite_column_names_second_level_range_query_with_decomposited_keys():
'''
check that we can execute range queries on the second part of a
CompositeType column name after we unpack the composite key into a
long string of concatenated hex forms of the UUIDs
'''
sm = SystemManager(chosen_server)
sm.create_keyspace(namespace, SIMPLE_STRATEGY, {'replication_factor': '1'})
family = 'test'
sm.create_column_family(
namespace, family, super=False,
key_validation_class = ASCII_TYPE,
default_validation_class = BYTES_TYPE,
comparator_type=UTF8Type(),
)
pool = ConnectionPool(namespace, config['storage_addresses'],
max_retries=1000, pool_timeout=10, pool_size=2,
timeout=120)
cf = pycassa.ColumnFamily(pool, family)
u1, u2, u3, u4 = uuid.uuid1(), uuid.uuid1(), uuid.uuid1(), uuid.uuid1()
cf.insert('inbound', {join_uuids(u1, u2): b''})
cf.insert('inbound', {join_uuids(u1, u3): b''})
cf.insert('inbound', {join_uuids(u1, u4): b''})
## test range searching
start = uuid.UUID(int=u3.int - 1)
finish = uuid.UUID(int=u3.int + 1)
assert start.int < u3.int < finish.int
rec3 = cf.get('inbound',
column_start =join_uuids(u1, start),
column_finish=join_uuids(u1, finish)).items()
assert len(rec3) == 1
assert split_uuids(rec3[0][0])[1] == u3
#### This assert above passes!
#### This next part fails :-/
## now insert many rows -- enough that some should fall in each
## subrange below
for i in xrange(1000):
cf.insert('inbound', {join_uuids(u1, uuid.uuid4()): b''})
## do four ranges, and expect more than zero in each
step_size = 2**(128 - 2)
for i in range(2**2, 0, -1):
start = uuid.UUID(int=(i-1) * step_size)
finish = uuid.UUID(int=min(i * step_size, 2**128 - 1))
recs = cf.get('inbound',
column_start =join_uuids(u1, start),
column_finish=join_uuids(u1, finish)).items()
for key, val in recs:
key = split_uuids(key)
assert val == b''
assert key[0] == u1
assert key[1] < finish
assert start < key[1] ## this passes!! (fails with
CompositeType...)
assert len(recs) > 0
print len(recs), ' for ', start, finish
sm.close()