This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch cleanup-before-5.0.0
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
The following commit(s) were added to refs/heads/cleanup-before-5.0.0 by this
push:
new e921ed1 more cleanup
e921ed1 is described below
commit e921ed1a9bcb2287f52dd262de76e73e89063d9d
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Tue Oct 31 16:55:01 2023 -0700
more cleanup
---
cpc/include/cpc_common.hpp | 24 +++++++-----------------
cpc/include/cpc_compressor.hpp | 11 +++++++----
cpc/include/cpc_compressor_impl.hpp | 34 +++++++++++++++++-----------------
cpc/include/cpc_sketch.hpp | 12 +++++-------
cpc/include/cpc_sketch_impl.hpp | 23 +++++++++++++----------
cpc/include/cpc_union.hpp | 9 ++++++---
cpc/include/cpc_union_impl.hpp | 16 ++++++++--------
cpc/include/u32_table.hpp | 5 +++--
cpc/include/u32_table_impl.hpp | 10 +++++-----
cpc/test/cpc_sketch_test.cpp | 8 ++++----
cpc/test/cpc_union_test.cpp | 8 ++++----
11 files changed, 79 insertions(+), 81 deletions(-)
diff --git a/cpc/include/cpc_common.hpp b/cpc/include/cpc_common.hpp
index feb1e15..fdf06b5 100644
--- a/cpc/include/cpc_common.hpp
+++ b/cpc/include/cpc_common.hpp
@@ -32,39 +32,29 @@ namespace cpc_constants {
const uint8_t DEFAULT_LG_K = 11;
}
-// TODO: Redundant and deprecated. Will be removed in next major version
release.
-static const uint8_t CPC_MIN_LG_K = cpc_constants::MIN_LG_K;
-static const uint8_t CPC_MAX_LG_K = cpc_constants::MAX_LG_K;
-static const uint8_t CPC_DEFAULT_LG_K = cpc_constants::DEFAULT_LG_K;
-
-template<typename A> using AllocU8 = typename
std::allocator_traits<A>::template rebind_alloc<uint8_t>;
-template<typename A> using AllocU16 = typename
std::allocator_traits<A>::template rebind_alloc<uint16_t>;
-template<typename A> using AllocU32 = typename
std::allocator_traits<A>::template rebind_alloc<uint32_t>;
-template<typename A> using AllocU64 = typename
std::allocator_traits<A>::template rebind_alloc<uint64_t>;
-
-template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
-template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
-template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
-
// forward declaration
template<typename A> class u32_table;
template<typename A>
struct compressed_state {
+ using vector_u32 = std::vector<uint32_t, typename
std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
+
explicit compressed_state(const A& allocator): table_data(allocator),
table_data_words(0), table_num_entries(0),
window_data(allocator), window_data_words(0) {}
- vector_u32<A> table_data;
+ vector_u32 table_data;
uint32_t table_data_words;
uint32_t table_num_entries; // can be different from the number of entries
in the sketch in hybrid mode
- vector_u32<A> window_data;
+ vector_u32 window_data;
uint32_t window_data_words;
};
template<typename A>
struct uncompressed_state {
+ using vector_bytes = std::vector<uint8_t, typename
std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+
explicit uncompressed_state(const A& allocator): table(allocator),
window(allocator) {}
u32_table<A> table;
- vector_u8<A> window;
+ vector_bytes window;
};
} /* namespace datasketches */
diff --git a/cpc/include/cpc_compressor.hpp b/cpc/include/cpc_compressor.hpp
index a8f426f..ffcf776 100644
--- a/cpc/include/cpc_compressor.hpp
+++ b/cpc/include/cpc_compressor.hpp
@@ -47,6 +47,9 @@ inline cpc_compressor<A>& get_compressor();
template<typename A>
class cpc_compressor {
public:
+ using vector_bytes = std::vector<uint8_t, typename
std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+ using vector_u32 = std::vector<uint32_t, typename
std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
+
void compress(const cpc_sketch_alloc<A>& source, compressed_state<A>&
target) const;
void uncompress(const compressed_state<A>& source, uncompressed_state<A>&
target, uint8_t lg_k, uint32_t num_coupons) const;
@@ -126,17 +129,17 @@ private:
uint16_t* make_decoding_table(const uint16_t* encoding_table, unsigned
num_byte_values);
void validate_decoding_table(const uint16_t* decoding_table, const uint16_t*
encoding_table) const;
- void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k,
compressed_state<A>& result) const;
+ void compress_surprising_values(const vector_u32& pairs, uint8_t lg_k,
compressed_state<A>& result) const;
void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t
num_coupons, compressed_state<A>& target) const;
- vector_u32<A> uncompress_surprising_values(const uint32_t* data, uint32_t
data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
- void uncompress_sliding_window(const uint32_t* data, uint32_t data_words,
vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
+ vector_u32 uncompress_surprising_values(const uint32_t* data, uint32_t
data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
+ void uncompress_sliding_window(const uint32_t* data, uint32_t data_words,
vector_bytes& window, uint8_t lg_k, uint32_t num_coupons) const;
static size_t safe_length_for_compressed_pair_buf(uint32_t k, uint32_t
num_pairs, uint8_t num_base_bits);
static size_t safe_length_for_compressed_window_buf(uint32_t k);
static uint8_t determine_pseudo_phase(uint8_t lg_k, uint32_t c);
- static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t*
window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A&
allocator);
+ static inline vector_u32 tricky_get_pairs_from_window(const uint8_t* window,
uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A&
allocator);
static inline uint8_t golomb_choose_number_of_base_bits(uint32_t k, uint64_t
count);
};
diff --git a/cpc/include/cpc_compressor_impl.hpp
b/cpc/include/cpc_compressor_impl.hpp
index 7f323be..e1e75d3 100644
--- a/cpc/include/cpc_compressor_impl.hpp
+++ b/cpc/include/cpc_compressor_impl.hpp
@@ -183,7 +183,7 @@ void cpc_compressor<A>::uncompress(const
compressed_state<A>& source, uncompress
template<typename A>
void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>&
source, compressed_state<A>& result) const {
if (source.sliding_window.size() > 0) throw std::logic_error("unexpected
sliding window");
- vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+ vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
compress_surprising_values(pairs, source.get_lg_k(), result);
}
@@ -192,7 +192,7 @@ template<typename A>
void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>&
source, uncompressed_state<A>& target, uint8_t lg_k) const {
if (source.window_data.size() > 0) throw std::logic_error("unexpected
sliding window");
if (source.table_data.size() == 0) throw std::logic_error("table is
expected");
- vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(),
source.table_data_words, source.table_num_entries,
+ vector_u32 pairs = uncompress_surprising_values(source.table_data.data(),
source.table_data_words, source.table_num_entries,
lg_k, source.table_data.get_allocator());
target.table = u32_table<A>::make_from_pairs(pairs.data(),
source.table_num_entries, lg_k, pairs.get_allocator());
}
@@ -204,12 +204,12 @@ void cpc_compressor<A>::compress_hybrid_flavor(const
cpc_sketch_alloc<A>& source
if (source.sliding_window.size() == 0) throw std::logic_error("no sliding
window");
if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
const uint32_t k = 1 << source.get_lg_k();
- vector_u32<A> pairs_from_table =
source.surprising_value_table.unwrapping_get_items();
+ vector_u32 pairs_from_table =
source.surprising_value_table.unwrapping_get_items();
const uint32_t num_pairs_from_table =
static_cast<uint32_t>(pairs_from_table.size());
if (num_pairs_from_table > 0)
u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0,
num_pairs_from_table);
const uint32_t num_pairs_from_window = source.get_num_coupons() -
num_pairs_from_table; // because the window offset is zero
- vector_u32<A> all_pairs =
tricky_get_pairs_from_window(source.sliding_window.data(), k,
num_pairs_from_window, num_pairs_from_table, source.get_allocator());
+ vector_u32 all_pairs =
tricky_get_pairs_from_window(source.sliding_window.data(), k,
num_pairs_from_window, num_pairs_from_table, source.get_allocator());
u32_table<A>::merge(
pairs_from_table.data(), 0, pairs_from_table.size(),
@@ -224,7 +224,7 @@ template<typename A>
void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>&
source, uncompressed_state<A>& target, uint8_t lg_k) const {
if (source.window_data.size() > 0) throw std::logic_error("window is not
expected");
if (source.table_data.size() == 0) throw std::logic_error("table is
expected");
- vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(),
source.table_data_words, source.table_num_entries,
+ vector_u32 pairs = uncompress_surprising_values(source.table_data.data(),
source.table_data_words, source.table_num_entries,
lg_k, source.table_data.get_allocator());
// In the hybrid flavor, some of these pairs actually
@@ -250,7 +250,7 @@ void cpc_compressor<A>::uncompress_hybrid_flavor(const
compressed_state<A>& sour
template<typename A>
void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>&
source, compressed_state<A>& result) const {
compress_sliding_window(source.sliding_window.data(), source.get_lg_k(),
source.get_num_coupons(), result);
- vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+ vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
if (pairs.size() > 0) {
// Here we subtract 8 from the column indices. Because they are stored in
the low 6 bits
// of each row_col pair, and because no column index is less than 8 for a
"Pinned" sketch,
@@ -277,7 +277,7 @@ void cpc_compressor<A>::uncompress_pinned_flavor(const
compressed_state<A>& sour
target.table = u32_table<A>(2, 6 + lg_k,
source.table_data.get_allocator());
} else {
if (source.table_data.size() == 0) throw std::logic_error("table is
expected");
- vector_u32<A> pairs =
uncompress_surprising_values(source.table_data.data(), source.table_data_words,
num_pairs,
+ vector_u32 pairs = uncompress_surprising_values(source.table_data.data(),
source.table_data_words, num_pairs,
lg_k, source.table_data.get_allocator());
// undo the compressor's 8-column shift
for (uint32_t i = 0; i < num_pairs; i++) {
@@ -291,7 +291,7 @@ void cpc_compressor<A>::uncompress_pinned_flavor(const
compressed_state<A>& sour
template<typename A>
void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>&
source, compressed_state<A>& result) const {
compress_sliding_window(source.sliding_window.data(), source.get_lg_k(),
source.get_num_coupons(), result);
- vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+ vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
if (pairs.size() > 0) {
// Here we apply a complicated transformation to the column indices, which
// changes the implied ordering of the pairs, so we must do it before
sorting.
@@ -330,7 +330,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const
compressed_state<A>& sou
target.table = u32_table<A>(2, 6 + lg_k,
source.table_data.get_allocator());
} else {
if (source.table_data.size() == 0) throw std::logic_error("table is
expected");
- vector_u32<A> pairs =
uncompress_surprising_values(source.table_data.data(), source.table_data_words,
num_pairs,
+ vector_u32 pairs = uncompress_surprising_values(source.table_data.data(),
source.table_data_words, num_pairs,
lg_k, source.table_data.get_allocator());
const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
@@ -356,7 +356,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const
compressed_state<A>& sou
}
template<typename A>
-void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs,
uint8_t lg_k, compressed_state<A>& result) const {
+void cpc_compressor<A>::compress_surprising_values(const vector_u32& pairs,
uint8_t lg_k, compressed_state<A>& result) const {
const uint32_t k = 1 << lg_k;
const uint32_t num_pairs = static_cast<uint32_t>(pairs.size());
const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k +
num_pairs, num_pairs);
@@ -374,10 +374,10 @@ void cpc_compressor<A>::compress_surprising_values(const
vector_u32<A>& pairs, u
}
template<typename A>
-vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t*
data, uint32_t data_words, uint32_t num_pairs,
- uint8_t lg_k, const A& allocator) const {
+auto cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data,
uint32_t data_words, uint32_t num_pairs,
+ uint8_t lg_k, const A& allocator) const -> vector_u32 {
const uint32_t k = 1 << lg_k;
- vector_u32<A> pairs(num_pairs, 0, allocator);
+ vector_u32 pairs(num_pairs, 0, allocator);
const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k +
num_pairs, num_pairs);
low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data,
data_words);
return pairs;
@@ -399,7 +399,7 @@ void cpc_compressor<A>::compress_sliding_window(const
uint8_t* window, uint8_t l
}
template<typename A>
-void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data,
uint32_t data_words, vector_u8<A>& window,
+void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data,
uint32_t data_words, vector_bytes& window,
uint8_t lg_k, uint32_t num_coupons) const {
const uint32_t k = 1 << lg_k;
window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
@@ -722,10 +722,10 @@ void write_unary(
// The empty space that this leaves at the beginning of the output array
// will be filled in later by the caller.
template<typename A>
-vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t*
window, uint32_t k, uint32_t num_pairs_to_get,
- uint32_t empty_space, const A& allocator) {
+auto cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window,
uint32_t k, uint32_t num_pairs_to_get,
+ uint32_t empty_space, const A& allocator) -> vector_u32 {
const size_t output_length = empty_space + num_pairs_to_get;
- vector_u32<A> pairs(output_length, 0, allocator);
+ vector_u32 pairs(output_length, 0, allocator);
size_t pair_index = empty_space;
for (unsigned row_index = 0; row_index < k; row_index++) {
uint8_t byte = window[row_index];
diff --git a/cpc/include/cpc_sketch.hpp b/cpc/include/cpc_sketch.hpp
index 77b2a35..b35e528 100644
--- a/cpc/include/cpc_sketch.hpp
+++ b/cpc/include/cpc_sketch.hpp
@@ -64,6 +64,8 @@ template<typename A>
class cpc_sketch_alloc {
public:
using allocator_type = A;
+ using vector_bytes = std::vector<uint8_t, typename
std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+ using vector_u64 = std::vector<uint64_t, typename
std::allocator_traits<A>::template rebind_alloc<uint64_t>>;
/**
* Creates an instance of the sketch given the lg_k parameter and hash seed.
@@ -204,10 +206,6 @@ public:
*/
void serialize(std::ostream& os) const;
- // This is a convenience alias for users
- // The type returned by the following serialize method
- using vector_bytes = vector_u8<A>;
-
/**
* This method serializes the sketch as a vector of bytes.
* An optional header can be reserved in front of the sketch.
@@ -278,7 +276,7 @@ private:
uint32_t num_coupons; // the number of coupons collected so far
u32_table<A> surprising_value_table;
- vector_u8<A> sliding_window;
+ vector_bytes sliding_window;
uint8_t window_offset; // derivable from num_coupons, but made explicit for
speed
uint8_t first_interesting_column; // This is part of a speed optimization
@@ -287,7 +285,7 @@ private:
// for deserialization and cpc_union::get_result()
cpc_sketch_alloc(uint8_t lg_k, uint32_t num_coupons, uint8_t
first_interesting_column, u32_table<A>&& table,
- vector_u8<A>&& window, bool has_hip, double kxp, double hip_est_accum,
uint64_t seed);
+ vector_bytes&& window, bool has_hip, double kxp, double hip_est_accum,
uint64_t seed);
inline void row_col_update(uint32_t row_col);
inline void update_sparse(uint32_t row_col);
@@ -310,7 +308,7 @@ private:
static inline uint8_t determine_correct_offset(uint8_t lg_k, uint64_t c);
// this produces a full-size k-by-64 bit matrix
- vector_u64<A> build_bit_matrix() const;
+ vector_u64 build_bit_matrix() const;
static uint8_t get_preamble_ints(uint32_t num_coupons, bool has_hip, bool
has_table, bool has_window);
inline void write_hip(std::ostream& os) const;
diff --git a/cpc/include/cpc_sketch_impl.hpp b/cpc/include/cpc_sketch_impl.hpp
index c5f467e..84709cd 100644
--- a/cpc/include/cpc_sketch_impl.hpp
+++ b/cpc/include/cpc_sketch_impl.hpp
@@ -315,7 +315,7 @@ void cpc_sketch_alloc<A>::move_window() {
const uint32_t k = 1 << lg_k;
// Construct the full-sized bit matrix that corresponds to the sketch
- vector_u64<A> bit_matrix = build_bit_matrix();
+ vector_u64 bit_matrix = build_bit_matrix();
// refresh the KXP register on every 8th window shift.
if ((new_offset & 0x7) == 0) refresh_kxp(bit_matrix.data());
@@ -458,7 +458,7 @@ void cpc_sketch_alloc<A>::serialize(std::ostream& os) const
{
}
template<typename A>
-vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
+auto cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const ->
vector_bytes {
compressed_state<A> compressed(sliding_window.get_allocator());
compressed.table_data_words = 0;
compressed.table_num_entries = 0;
@@ -469,7 +469,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned
header_size_bytes) const {
const bool has_window = compressed.window_data.size() > 0;
const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip,
has_table, has_window);
const size_t size = header_size_bytes + (preamble_ints +
compressed.table_data_words + compressed.window_data_words) * sizeof(uint32_t);
- vector_u8<A> bytes(size, 0, sliding_window.get_allocator());
+ vector_bytes bytes(size, 0, sliding_window.get_allocator());
uint8_t* ptr = bytes.data() + header_size_bytes;
ptr += copy_to_mem(preamble_ints, ptr);
const uint8_t serial_version = SERIAL_VERSION;
@@ -712,15 +712,18 @@ static const size_t CPC_MAX_PREAMBLE_SIZE_BYTES = 40;
template<typename A>
size_t cpc_sketch_alloc<A>::get_max_serialized_size_bytes(uint8_t lg_k) {
check_lg_k(lg_k);
- if (lg_k <= CPC_EMPIRICAL_SIZE_MAX_LGK) return
CPC_EMPIRICAL_MAX_SIZE_BYTES[lg_k - CPC_MIN_LG_K] + CPC_MAX_PREAMBLE_SIZE_BYTES;
+ if (lg_k <= CPC_EMPIRICAL_SIZE_MAX_LGK) {
+ return CPC_EMPIRICAL_MAX_SIZE_BYTES[lg_k - cpc_constants::MIN_LG_K] +
CPC_MAX_PREAMBLE_SIZE_BYTES;
+ }
const uint32_t k = 1 << lg_k;
return (int) (CPC_EMPIRICAL_MAX_SIZE_FACTOR * k) +
CPC_MAX_PREAMBLE_SIZE_BYTES;
}
template<typename A>
void cpc_sketch_alloc<A>::check_lg_k(uint8_t lg_k) {
- if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
- throw std::invalid_argument("lg_k must be >= " +
std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": "
+ std::to_string(lg_k));
+ if (lg_k < cpc_constants::MIN_LG_K || lg_k > cpc_constants::MAX_LG_K) {
+ throw std::invalid_argument("lg_k must be >= " +
std::to_string(cpc_constants::MIN_LG_K) + " and <= "
+ + std::to_string(cpc_constants::MAX_LG_K) + ": " +
std::to_string(lg_k));
}
}
@@ -731,14 +734,14 @@ uint32_t cpc_sketch_alloc<A>::get_num_coupons() const {
template<typename A>
bool cpc_sketch_alloc<A>::validate() const {
- vector_u64<A> bit_matrix = build_bit_matrix();
+ vector_u64 bit_matrix = build_bit_matrix();
const uint64_t num_bits_set = count_bits_set_in_matrix(bit_matrix.data(),
1ULL << lg_k);
return num_bits_set == num_coupons;
}
template<typename A>
cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint32_t num_coupons,
uint8_t first_interesting_column,
- u32_table<A>&& table, vector_u8<A>&& window, bool has_hip, double kxp,
double hip_est_accum, uint64_t seed):
+ u32_table<A>&& table, vector_bytes&& window, bool has_hip, double kxp,
double hip_est_accum, uint64_t seed):
lg_k(lg_k),
seed(seed),
was_merged(!has_hip),
@@ -800,14 +803,14 @@ uint8_t
cpc_sketch_alloc<A>::determine_correct_offset(uint8_t lg_k, uint64_t c)
}
template<typename A>
-vector_u64<A> cpc_sketch_alloc<A>::build_bit_matrix() const {
+auto cpc_sketch_alloc<A>::build_bit_matrix() const -> vector_u64 {
const uint32_t k = 1 << lg_k;
if (window_offset > 56) throw std::logic_error("offset > 56");
// Fill the matrix with default rows in which the "early zone" is filled
with ones.
// This is essential for the routine's O(k) time cost (as opposed to O(C)).
const uint64_t default_row = (static_cast<uint64_t>(1) << window_offset) - 1;
- vector_u64<A> matrix(k, default_row, sliding_window.get_allocator());
+ vector_u64 matrix(k, default_row, sliding_window.get_allocator());
if (num_coupons == 0) return matrix;
diff --git a/cpc/include/cpc_union.hpp b/cpc/include/cpc_union.hpp
index f380fb7..08341dd 100644
--- a/cpc/include/cpc_union.hpp
+++ b/cpc/include/cpc_union.hpp
@@ -39,6 +39,9 @@ using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
template<typename A>
class cpc_union_alloc {
public:
+ using vector_bytes = std::vector<uint8_t, typename
std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+ using vector_u64 = std::vector<uint64_t, typename
std::allocator_traits<A>::template rebind_alloc<uint64_t>>;
+
/**
* Creates an instance of the union given the lg_k parameter and hash seed.
* @param lg_k base 2 logarithm of the number of bins in the sketch
@@ -101,7 +104,7 @@ private:
uint8_t lg_k;
uint64_t seed;
cpc_sketch_alloc<A>* accumulator;
- vector_u64<A> bit_matrix;
+ vector_u64 bit_matrix;
template<typename S> void internal_update(S&& sketch); // to support both
rvalue and lvalue
@@ -111,8 +114,8 @@ private:
void switch_to_bit_matrix();
void walk_table_updating_sketch(const u32_table<A>& table);
void or_table_into_matrix(const u32_table<A>& table);
- void or_window_into_matrix(const vector_u8<A>& sliding_window, uint8_t
offset, uint8_t src_lg_k);
- void or_matrix_into_matrix(const vector_u64<A>& src_matrix, uint8_t
src_lg_k);
+ void or_window_into_matrix(const vector_bytes& sliding_window, uint8_t
offset, uint8_t src_lg_k);
+ void or_matrix_into_matrix(const vector_u64& src_matrix, uint8_t src_lg_k);
void reduce_k(uint8_t new_lg_k);
};
diff --git a/cpc/include/cpc_union_impl.hpp b/cpc/include/cpc_union_impl.hpp
index 5865cf5..f277107 100644
--- a/cpc/include/cpc_union_impl.hpp
+++ b/cpc/include/cpc_union_impl.hpp
@@ -33,8 +33,8 @@ seed(seed),
accumulator(nullptr),
bit_matrix(allocator)
{
- if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
- throw std::invalid_argument("lg_k must be >= " +
std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": "
+ std::to_string(lg_k));
+ if (lg_k < cpc_constants::MIN_LG_K || lg_k > cpc_constants::MAX_LG_K) {
+ throw std::invalid_argument("lg_k must be >= " +
std::to_string(cpc_constants::MIN_LG_K) + " and <= " +
std::to_string(cpc_constants::MAX_LG_K) + ": " + std::to_string(lg_k));
}
accumulator = new (AllocCpc(allocator).allocate(1))
cpc_sketch_alloc<A>(lg_k, seed, allocator);
}
@@ -166,7 +166,7 @@ void cpc_union_alloc<A>::internal_update(S&& sketch) {
// SLIDING mode involves inverted logic, so we can't just walk the source
sketch.
// Instead, we convert it to a bitMatrix that can be OR'ed into the
destination.
if (cpc_sketch_alloc<A>::flavor::SLIDING != src_flavor) throw
std::logic_error("wrong flavor"); // Case D
- vector_u64<A> src_matrix = sketch.build_bit_matrix();
+ vector_u64 src_matrix = sketch.build_bit_matrix();
or_matrix_into_matrix(src_matrix, sketch.get_lg_k());
}
@@ -203,7 +203,7 @@ cpc_sketch_alloc<A>
cpc_union_alloc<A>::get_result_from_bit_matrix() const {
const uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k,
num_coupons);
- vector_u8<A> sliding_window(k, 0, bit_matrix.get_allocator());
+ vector_bytes sliding_window(k, 0, bit_matrix.get_allocator());
// don't need to zero the window's memory
// dynamically growing caused snowplow effect
@@ -289,7 +289,7 @@ void cpc_union_alloc<A>::or_table_into_matrix(const
u32_table<A>& table) {
}
template<typename A>
-void cpc_union_alloc<A>::or_window_into_matrix(const vector_u8<A>&
sliding_window, uint8_t offset, uint8_t src_lg_k) {
+void cpc_union_alloc<A>::or_window_into_matrix(const vector_bytes&
sliding_window, uint8_t offset, uint8_t src_lg_k) {
if (lg_k > src_lg_k) throw std::logic_error("dst LgK > src LgK");
const uint64_t dst_mask = (1 << lg_k) - 1; // downsamples when dst lgK < src
LgK
const uint32_t src_k = 1 << src_lg_k;
@@ -299,7 +299,7 @@ void cpc_union_alloc<A>::or_window_into_matrix(const
vector_u8<A>& sliding_windo
}
template<typename A>
-void cpc_union_alloc<A>::or_matrix_into_matrix(const vector_u64<A>&
src_matrix, uint8_t src_lg_k) {
+void cpc_union_alloc<A>::or_matrix_into_matrix(const vector_u64& src_matrix,
uint8_t src_lg_k) {
if (lg_k > src_lg_k) throw std::logic_error("dst LgK > src LgK");
const uint64_t dst_mask = (1 << lg_k) - 1; // downsamples when dst lgK < src
LgK
const uint32_t src_k = 1 << src_lg_k;
@@ -315,10 +315,10 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
if (bit_matrix.size() > 0) { // downsample the unioner's bit matrix
if (accumulator != nullptr) throw std::logic_error("accumulator is not
null");
- vector_u64<A> old_matrix = std::move(bit_matrix);
+ vector_u64 old_matrix = std::move(bit_matrix);
const uint8_t old_lg_k = lg_k;
const uint32_t new_k = 1 << new_lg_k;
- bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
+ bit_matrix = vector_u64(new_k, 0, old_matrix.get_allocator());
lg_k = new_lg_k;
or_matrix_into_matrix(old_matrix, old_lg_k);
return;
diff --git a/cpc/include/u32_table.hpp b/cpc/include/u32_table.hpp
index a344a17..afdea83 100644
--- a/cpc/include/u32_table.hpp
+++ b/cpc/include/u32_table.hpp
@@ -38,6 +38,7 @@ static const uint32_t U32_TABLE_DOWNSIZE_DENOM = 4LL;
template<typename A>
class u32_table {
public:
+ using vector_u32 = std::vector<uint32_t, typename
std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
u32_table(const A& allocator);
u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
@@ -54,7 +55,7 @@ public:
static u32_table make_from_pairs(const uint32_t* pairs, uint32_t num_pairs,
uint8_t lg_k, const A& allocator);
- vector_u32<A> unwrapping_get_items() const;
+ vector_u32 unwrapping_get_items() const;
static void merge(
const uint32_t* arr_a, size_t start_a, size_t length_a, // input
@@ -70,7 +71,7 @@ private:
uint8_t lg_size; // log2 of number of slots
uint8_t num_valid_bits;
uint32_t num_items;
- vector_u32<A> slots;
+ vector_u32 slots;
inline uint32_t lookup(uint32_t item) const;
inline void must_insert(uint32_t item);
diff --git a/cpc/include/u32_table_impl.hpp b/cpc/include/u32_table_impl.hpp
index a82e7de..62cd7da 100644
--- a/cpc/include/u32_table_impl.hpp
+++ b/cpc/include/u32_table_impl.hpp
@@ -151,8 +151,8 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
const uint32_t old_size = 1 << lg_size;
const uint32_t new_size = 1 << new_lg_size;
if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
- vector_u32<A> old_slots = std::move(slots);
- slots = vector_u32<A>(new_size, UINT32_MAX, old_slots.get_allocator());
+ vector_u32 old_slots = std::move(slots);
+ slots = vector_u32(new_size, UINT32_MAX, old_slots.get_allocator());
lg_size = new_lg_size;
for (uint32_t i = 0; i < old_size; i++) {
if (old_slots[i] != UINT32_MAX) {
@@ -168,10 +168,10 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
// and even then the subsequent sort would fix things up.
// The result is nearly sorted, so make sure to use an efficient sort for that
case
template<typename A>
-vector_u32<A> u32_table<A>::unwrapping_get_items() const {
- if (num_items == 0) return vector_u32<A>(slots.get_allocator());
+auto u32_table<A>::unwrapping_get_items() const -> vector_u32 {
+ if (num_items == 0) return vector_u32(slots.get_allocator());
const uint32_t table_size = 1 << lg_size;
- vector_u32<A> result(num_items, 0, slots.get_allocator());
+ vector_u32 result(num_items, 0, slots.get_allocator());
size_t i = 0;
size_t l = 0;
size_t r = num_items - 1;
diff --git a/cpc/test/cpc_sketch_test.cpp b/cpc/test/cpc_sketch_test.cpp
index fd9bcf3..e38d45c 100644
--- a/cpc/test/cpc_sketch_test.cpp
+++ b/cpc/test/cpc_sketch_test.cpp
@@ -32,10 +32,10 @@ namespace datasketches {
static const double RELATIVE_ERROR_FOR_LG_K_11 = 0.02;
TEST_CASE("cpc sketch: lg k limits", "[cpc_sketch]") {
- cpc_sketch s1(CPC_MIN_LG_K); // this should work
- cpc_sketch s2(CPC_MAX_LG_K); // this should work
- REQUIRE_THROWS_AS(cpc_sketch(CPC_MIN_LG_K - 1), std::invalid_argument);
- REQUIRE_THROWS_AS(cpc_sketch(CPC_MAX_LG_K + 1), std::invalid_argument);
+ cpc_sketch s1(cpc_constants::MIN_LG_K); // this should work
+ cpc_sketch s2(cpc_constants::MAX_LG_K); // this should work
+ REQUIRE_THROWS_AS(cpc_sketch(cpc_constants::MIN_LG_K - 1),
std::invalid_argument);
+ REQUIRE_THROWS_AS(cpc_sketch(cpc_constants::MAX_LG_K + 1),
std::invalid_argument);
}
TEST_CASE("cpc sketch: empty", "[cpc_sketch]") {
diff --git a/cpc/test/cpc_union_test.cpp b/cpc/test/cpc_union_test.cpp
index 542ae06..6a03475 100644
--- a/cpc/test/cpc_union_test.cpp
+++ b/cpc/test/cpc_union_test.cpp
@@ -28,10 +28,10 @@ namespace datasketches {
static const double RELATIVE_ERROR_FOR_LG_K_11 = 0.02;
TEST_CASE("cpc union: lg k limits", "[cpc_union]") {
- cpc_union u1(CPC_MIN_LG_K); // this should work
- cpc_union u2(CPC_MAX_LG_K); // this should work
- REQUIRE_THROWS_AS(cpc_union(CPC_MIN_LG_K - 1), std::invalid_argument);
- REQUIRE_THROWS_AS(cpc_union(CPC_MAX_LG_K + 1), std::invalid_argument);
+ cpc_union u1(cpc_constants::MIN_LG_K); // this should work
+ cpc_union u2(cpc_constants::MAX_LG_K); // this should work
+ REQUIRE_THROWS_AS(cpc_union(cpc_constants::MIN_LG_K - 1),
std::invalid_argument);
+ REQUIRE_THROWS_AS(cpc_union(cpc_constants::MAX_LG_K + 1),
std::invalid_argument);
}
TEST_CASE("cpc union: empty", "[cpc_union]") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]