This is an automated email from the ASF dual-hosted git repository.
placave pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 7ff21fc refactor: use new codec utils in TDigest and Frequencies
sketches (#52)
7ff21fc is described below
commit 7ff21fc004062b16d9899af2ac0adee38bc9780f
Author: tison <[email protected]>
AuthorDate: Wed Dec 31 22:05:27 2025 +0800
refactor: use new codec utils in TDigest and Frequencies sketches (#52)
* refactor: use new codec utils in TDigest and Frequencies sketches
Signed-off-by: tison <[email protected]>
* for frequencies
Signed-off-by: tison <[email protected]>
* fixup
Signed-off-by: tison <[email protected]>
* simplify code
Signed-off-by: tison <[email protected]>
---------
Signed-off-by: tison <[email protected]>
---
datasketches/src/frequencies/mod.rs | 1 -
datasketches/src/frequencies/serde.rs | 98 ----------------
datasketches/src/frequencies/serialization.rs | 121 ++++++++++----------
datasketches/src/frequencies/sketch.rs | 156 ++++++++++++++------------
datasketches/src/hll/array4.rs | 2 +-
datasketches/src/hll/array6.rs | 2 +-
datasketches/src/hll/array8.rs | 2 +-
datasketches/src/hll/hash_set.rs | 2 +-
datasketches/src/hll/list.rs | 2 +-
datasketches/src/hll/serialization.rs | 2 +-
datasketches/src/hll/sketch.rs | 4 +-
datasketches/src/tdigest/sketch.rs | 35 +++---
12 files changed, 169 insertions(+), 258 deletions(-)
diff --git a/datasketches/src/frequencies/mod.rs
b/datasketches/src/frequencies/mod.rs
index d04343a..e461b61 100644
--- a/datasketches/src/frequencies/mod.rs
+++ b/datasketches/src/frequencies/mod.rs
@@ -25,7 +25,6 @@
//!
<https://apache.github.io/datasketches-java/9.0.0/org/apache/datasketches/frequencies/FrequentItemsSketch.html>
mod reverse_purge_item_hash_map;
-mod serde;
mod serialization;
mod sketch;
diff --git a/datasketches/src/frequencies/serde.rs
b/datasketches/src/frequencies/serde.rs
deleted file mode 100644
index a44e6b0..0000000
--- a/datasketches/src/frequencies/serde.rs
+++ /dev/null
@@ -1,98 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Serialization helpers for frequent items sketches.
-
-use std::str;
-
-use crate::error::Error;
-use crate::frequencies::serialization::read_i64_le;
-use crate::frequencies::serialization::read_u32_le;
-
-pub(crate) fn serialize_string_items(items: &[String]) -> Vec<u8> {
- let total_len: usize = items.iter().map(|item| 4 + item.len()).sum();
- let mut out = Vec::with_capacity(total_len);
- for item in items {
- let bytes = item.as_bytes();
- let len = bytes.len() as u32;
- out.extend_from_slice(&len.to_le_bytes());
- out.extend_from_slice(bytes);
- }
- out
-}
-
-pub(crate) fn deserialize_string_items(
- bytes: &[u8],
- num_items: usize,
-) -> Result<(Vec<String>, usize), Error> {
- if num_items == 0 {
- return Ok((Vec::new(), 0));
- }
- let mut items = Vec::with_capacity(num_items);
- let mut offset = 0usize;
- for _ in 0..num_items {
- if offset + 4 > bytes.len() {
- return Err(Error::insufficient_data(
- "not enough bytes for string length",
- ));
- }
- let len = read_u32_le(bytes, offset) as usize;
- offset += 4;
- if offset + len > bytes.len() {
- return Err(Error::insufficient_data(
- "not enough bytes for string payload",
- ));
- }
- let slice = &bytes[offset..offset + len];
- let value = match str::from_utf8(slice) {
- Ok(s) => s.to_string(),
- Err(_) => {
- return Err(Error::deserial("invalid UTF-8 string payload"));
- }
- };
- items.push(value);
- offset += len;
- }
- Ok((items, offset))
-}
-
-pub(crate) fn serialize_i64_items(items: &[i64]) -> Vec<u8> {
- let mut out = Vec::with_capacity(items.len() * 8);
- for item in items {
- out.extend_from_slice(&item.to_le_bytes());
- }
- out
-}
-
-pub(crate) fn deserialize_i64_items(
- bytes: &[u8],
- num_items: usize,
-) -> Result<(Vec<i64>, usize), Error> {
- let needed = num_items
- .checked_mul(8)
- .ok_or_else(|| Error::deserial("items size overflow"))?;
- if bytes.len() < needed {
- return Err(Error::insufficient_data("not enough bytes for i64 items"));
- }
- let mut items = Vec::with_capacity(num_items);
- for i in 0..num_items {
- let offset = i * 8;
- let value = read_i64_le(bytes, offset);
- items.push(value);
- }
- Ok((items, needed))
-}
diff --git a/datasketches/src/frequencies/serialization.rs
b/datasketches/src/frequencies/serialization.rs
index 43003c9..3f8600b 100644
--- a/datasketches/src/frequencies/serialization.rs
+++ b/datasketches/src/frequencies/serialization.rs
@@ -15,12 +15,14 @@
// specific language governing permissions and limitations
// under the License.
-//! Serialization constants and helpers for frequency sketches.
+use crate::codec::SketchBytes;
+use crate::codec::SketchSlice;
+use crate::error::Error;
/// Family ID for frequency sketches.
pub const FAMILY_ID: u8 = 10;
/// Serialization version.
-pub const SER_VER: u8 = 1;
+pub const SERIAL_VERSION: u8 = 1;
/// Preamble longs for empty sketch.
pub const PREAMBLE_LONGS_EMPTY: u8 = 1;
@@ -30,75 +32,66 @@ pub const PREAMBLE_LONGS_NONEMPTY: u8 = 4;
/// Empty flag mask (both bits for compatibility).
pub const EMPTY_FLAG_MASK: u8 = 5;
-/// Offset of preamble longs byte.
-pub const PREAMBLE_LONGS_BYTE: usize = 0;
-/// Offset of serialization version byte.
-pub const SER_VER_BYTE: usize = 1;
-/// Offset of family ID byte.
-pub const FAMILY_BYTE: usize = 2;
-/// Offset of lg_max_map_size byte.
-pub const LG_MAX_MAP_SIZE_BYTE: usize = 3;
-/// Offset of lg_cur_map_size byte.
-pub const LG_CUR_MAP_SIZE_BYTE: usize = 4;
-/// Offset of flags byte.
-pub const FLAGS_BYTE: usize = 5;
-
-/// Offset of active items int (low 32 bits of second pre-long).
-pub const ACTIVE_ITEMS_INT: usize = 8;
-/// Offset of stream weight (third pre-long).
-pub const STREAM_WEIGHT_LONG: usize = 16;
-/// Offset of offset (fourth pre-long).
-pub const OFFSET_LONG: usize = 24;
+pub(crate) fn count_string_items_bytes(items: &[String]) -> usize {
+ items.iter().map(|item| 4 + item.len()).sum()
+}
-/// Read an u32 value from bytes at the given offset (little-endian).
-#[inline]
-pub fn read_u32_le(bytes: &[u8], offset: usize) -> u32 {
- u32::from_le_bytes([
- bytes[offset],
- bytes[offset + 1],
- bytes[offset + 2],
- bytes[offset + 3],
- ])
+pub(crate) fn serialize_string_items(bytes: &mut SketchBytes, items:
&[String]) {
+ for item in items {
+ let bs = item.as_bytes();
+ bytes.write_u32_le(bs.len() as u32);
+ bytes.write(bs);
+ }
}
-/// Read an i64 value from bytes at the given offset (little-endian).
-#[inline]
-pub fn read_i64_le(bytes: &[u8], offset: usize) -> i64 {
- i64::from_le_bytes([
- bytes[offset],
- bytes[offset + 1],
- bytes[offset + 2],
- bytes[offset + 3],
- bytes[offset + 4],
- bytes[offset + 5],
- bytes[offset + 6],
- bytes[offset + 7],
- ])
+pub(crate) fn deserialize_string_items(
+ mut cursor: SketchSlice<'_>,
+ num_items: usize,
+) -> Result<Vec<String>, Error> {
+ let mut items = Vec::with_capacity(num_items);
+ for i in 0..num_items {
+ let len = cursor.read_u32_le().map_err(|_| {
+ Error::insufficient_data(format!(
+ "expected {num_items} string items, failed to read len at
index {i}"
+ ))
+ })?;
+
+ let mut slice = vec![0; len as usize];
+ cursor.read_exact(&mut slice).map_err(|_| {
+ Error::insufficient_data(format!(
+ "expected {num_items} string items, failed to read slice at
index {i}"
+ ))
+ })?;
+
+ let value = String::from_utf8(slice)
+ .map_err(|_| Error::deserial(format!("invalid UTF-8 string payload
at index {i}")))?;
+ items.push(value);
+ }
+ Ok(items)
}
-/// Read an u64 value from bytes at the given offset (little-endian).
-#[inline]
-pub fn read_u64_le(bytes: &[u8], offset: usize) -> u64 {
- u64::from_le_bytes([
- bytes[offset],
- bytes[offset + 1],
- bytes[offset + 2],
- bytes[offset + 3],
- bytes[offset + 4],
- bytes[offset + 5],
- bytes[offset + 6],
- bytes[offset + 7],
- ])
+pub(crate) fn count_i64_items_bytes(items: &[i64]) -> usize {
+ items.len() * 8
}
-/// Write a u32 value to bytes at the given offset (little-endian).
-#[inline]
-pub fn write_u32_le(bytes: &mut [u8], offset: usize, value: u32) {
- bytes[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
+pub(crate) fn serialize_i64_items(bytes: &mut SketchBytes, items: &[i64]) {
+ for item in items.iter().copied() {
+ bytes.write_i64_le(item);
+ }
}
-/// Write an u64 value to bytes at the given offset (little-endian).
-#[inline]
-pub fn write_u64_le(bytes: &mut [u8], offset: usize, value: u64) {
- bytes[offset..offset + 8].copy_from_slice(&value.to_le_bytes());
+pub(crate) fn deserialize_i64_items(
+ mut cursor: SketchSlice<'_>,
+ num_items: usize,
+) -> Result<Vec<i64>, Error> {
+ let mut items = Vec::with_capacity(num_items);
+ for i in 0..num_items {
+ let value = cursor.read_i64_le().map_err(|_| {
+ Error::insufficient_data(format!(
+ "expected {num_items} i64 items, failed at index {i}"
+ ))
+ })?;
+ items.push(value);
+ }
+ Ok(items)
}
diff --git a/datasketches/src/frequencies/sketch.rs
b/datasketches/src/frequencies/sketch.rs
index ee4552b..28f3325 100644
--- a/datasketches/src/frequencies/sketch.rs
+++ b/datasketches/src/frequencies/sketch.rs
@@ -19,15 +19,15 @@
use std::hash::Hash;
+use crate::codec::SketchBytes;
+use crate::codec::SketchSlice;
use crate::error::Error;
use crate::frequencies::reverse_purge_item_hash_map::ReversePurgeItemHashMap;
-use crate::frequencies::serde::deserialize_i64_items;
-use crate::frequencies::serde::deserialize_string_items;
-use crate::frequencies::serde::serialize_i64_items;
-use crate::frequencies::serde::serialize_string_items;
use crate::frequencies::serialization::*;
-type DeserializeItems<T> = fn(&[u8], usize) -> Result<(Vec<T>, usize), Error>;
+type CountSerializeSize<T> = fn(&[T]) -> usize;
+type SerializeItems<T> = fn(&mut SketchBytes, &[T]);
+type DeserializeItems<T> = fn(SketchSlice<'_>, usize) -> Result<Vec<T>, Error>;
const LG_MIN_MAP_SIZE: u8 = 3;
const SAMPLE_SIZE: usize = 1024;
@@ -322,63 +322,78 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
}
}
- fn serialize_inner(&self, serialize_items: fn(&[T]) -> Vec<u8>) -> Vec<u8>
+ fn serialize_inner(
+ &self,
+ count_serialize_size: CountSerializeSize<T>,
+ serialize_items: SerializeItems<T>,
+ ) -> Vec<u8>
where
- T: Clone,
+ T: Clone, // for self.hash_map.active_keys()
{
if self.is_empty() {
- let mut out = vec![0u8; 8];
- out[PREAMBLE_LONGS_BYTE] = PREAMBLE_LONGS_EMPTY;
- out[SER_VER_BYTE] = SER_VER;
- out[FAMILY_BYTE] = FAMILY_ID;
- out[LG_MAX_MAP_SIZE_BYTE] = self.lg_max_map_size;
- out[LG_CUR_MAP_SIZE_BYTE] = self.hash_map.lg_length();
- out[FLAGS_BYTE] = EMPTY_FLAG_MASK;
- return out;
+ let mut bytes = SketchBytes::with_capacity(8);
+ bytes.write_u8(PREAMBLE_LONGS_EMPTY);
+ bytes.write_u8(SERIAL_VERSION);
+ bytes.write_u8(FAMILY_ID);
+ bytes.write_u8(self.lg_max_map_size);
+ bytes.write_u8(self.hash_map.lg_length());
+ bytes.write_u8(EMPTY_FLAG_MASK);
+ return bytes.into_bytes();
}
+
let active_items = self.num_active_items();
let values = self.hash_map.active_values();
let keys = self.hash_map.active_keys();
- let items_bytes = serialize_items(&keys);
let total_bytes =
- PREAMBLE_LONGS_NONEMPTY as usize * 8 + (active_items * 8) +
items_bytes.len();
- let mut out = vec![0u8; total_bytes];
- out[PREAMBLE_LONGS_BYTE] = PREAMBLE_LONGS_NONEMPTY;
- out[SER_VER_BYTE] = SER_VER;
- out[FAMILY_BYTE] = FAMILY_ID;
- out[LG_MAX_MAP_SIZE_BYTE] = self.lg_max_map_size;
- out[LG_CUR_MAP_SIZE_BYTE] = self.hash_map.lg_length();
- out[FLAGS_BYTE] = 0;
- write_u32_le(&mut out, ACTIVE_ITEMS_INT, active_items as u32);
- write_u64_le(&mut out, STREAM_WEIGHT_LONG, self.stream_weight);
- write_u64_le(&mut out, OFFSET_LONG, self.offset);
-
- let mut offset = PREAMBLE_LONGS_NONEMPTY as usize * 8;
+ PREAMBLE_LONGS_NONEMPTY as usize * 8 + (active_items * 8) +
count_serialize_size(&keys);
+
+ let mut bytes = SketchBytes::with_capacity(total_bytes);
+ bytes.write_u8(PREAMBLE_LONGS_NONEMPTY);
+ bytes.write_u8(SERIAL_VERSION);
+ bytes.write_u8(FAMILY_ID);
+ bytes.write_u8(self.lg_max_map_size);
+ bytes.write_u8(self.hash_map.lg_length());
+ bytes.write_u8(0); // flags
+ bytes.write_u16_le(0); // unused
+
+ bytes.write_u32_le(active_items as u32);
+ bytes.write_u32_le(0); // unused
+ bytes.write_u64_le(self.stream_weight);
+ bytes.write_u64_le(self.offset);
+
for value in values {
- write_u64_le(&mut out, offset, value);
- offset += 8;
+ bytes.write_u64_le(value);
}
- out[offset..offset + items_bytes.len()].copy_from_slice(&items_bytes);
- out
+ serialize_items(&mut bytes, &keys);
+
+ bytes.into_bytes()
}
fn deserialize_inner(
bytes: &[u8],
deserialize_items: DeserializeItems<T>,
) -> Result<Self, Error> {
- if bytes.len() < 8 {
- return Err(Error::insufficient_data("preamble"));
+ fn make_error(tag: &'static str) -> impl FnOnce(std::io::Error) ->
Error {
+ move |_| Error::insufficient_data(tag)
}
- let pre_longs = bytes[PREAMBLE_LONGS_BYTE] & 0x3f;
- let ser_ver = bytes[SER_VER_BYTE];
- let family = bytes[FAMILY_BYTE];
- let lg_max = bytes[LG_MAX_MAP_SIZE_BYTE];
- let lg_cur = bytes[LG_CUR_MAP_SIZE_BYTE];
- let flags = bytes[FLAGS_BYTE];
- let is_empty = (flags & EMPTY_FLAG_MASK) != 0;
- if ser_ver != SER_VER {
- return Err(Error::unsupported_serial_version(SER_VER, ser_ver));
+
+ let mut cursor = SketchSlice::new(bytes);
+ let pre_longs = cursor.read_u8().map_err(make_error("pre_longs"))?;
+ let pre_longs = pre_longs & 0x3F;
+ let serial_version =
cursor.read_u8().map_err(make_error("serial_version"))?;
+ let family = cursor.read_u8().map_err(make_error("family"))?;
+ let lg_max = cursor.read_u8().map_err(make_error("lg_max_map_size"))?;
+ let lg_cur = cursor.read_u8().map_err(make_error("lg_cur_map_size"))?;
+ let flags = cursor.read_u8().map_err(make_error("flags"))?;
+ cursor.read_u16_le().map_err(make_error("<unused>"))?;
+
+ if serial_version != SERIAL_VERSION {
+ return Err(Error::unsupported_serial_version(
+ SERIAL_VERSION,
+ serial_version,
+ ));
}
+
if family != FAMILY_ID {
return Err(Error::invalid_family(
FAMILY_ID,
@@ -386,51 +401,52 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
"FrequentItemsSketch",
));
}
+
if lg_cur > lg_max {
return Err(Error::deserial("lg_cur_map_size exceeds
lg_max_map_size"));
}
+
+ let is_empty = (flags & EMPTY_FLAG_MASK) != 0;
if is_empty {
- if pre_longs != PREAMBLE_LONGS_EMPTY {
- return Err(Error::invalid_preamble_longs(
+ return if pre_longs != PREAMBLE_LONGS_EMPTY {
+ Err(Error::invalid_preamble_longs(
PREAMBLE_LONGS_EMPTY,
pre_longs,
- ));
- }
- return Ok(Self::with_lg_map_sizes(lg_max, lg_cur));
+ ))
+ } else {
+ Ok(Self::with_lg_map_sizes(lg_max, lg_cur))
+ };
}
+
if pre_longs != PREAMBLE_LONGS_NONEMPTY {
return Err(Error::invalid_preamble_longs(
PREAMBLE_LONGS_NONEMPTY,
pre_longs,
));
}
- if bytes.len() < PREAMBLE_LONGS_NONEMPTY as usize * 8 {
- return Err(Error::insufficient_data("full preamble"));
- }
- let active_items = read_u32_le(bytes, ACTIVE_ITEMS_INT) as usize;
- let stream_weight = read_u64_le(bytes, STREAM_WEIGHT_LONG);
- let offset_val = read_u64_le(bytes, OFFSET_LONG);
- let values_offset = PREAMBLE_LONGS_NONEMPTY as usize * 8;
- let values_bytes = active_items
- .checked_mul(8)
- .ok_or_else(|| Error::deserial("values size overflow"))?;
- let items_offset = values_offset + values_bytes;
- if bytes.len() < items_offset {
- return Err(Error::insufficient_data("values"));
- }
+
+ let active_items =
cursor.read_u32_le().map_err(make_error("active_items"))?;
+ let active_items = active_items as usize;
+ cursor.read_u32_le().map_err(make_error("<unused>"))?;
+ let stream_weight =
cursor.read_u64_le().map_err(make_error("stream_weight"))?;
+ let offset_val = cursor.read_u64_le().map_err(make_error("offset"))?;
+
let mut values = Vec::with_capacity(active_items);
for i in 0..active_items {
- values.push(read_u64_le(bytes, values_offset + i * 8));
+ values.push(cursor.read_u64_le().map_err(|_| {
+ Error::insufficient_data(format!(
+ "expected {active_items} weights, failed at index {i}"
+ ))
+ })?);
}
- let (items, consumed) = deserialize_items(&bytes[items_offset..],
active_items)?;
+
+ let items = deserialize_items(cursor, active_items)?;
if items.len() != active_items {
return Err(Error::deserial(
"item count mismatch during deserialization",
));
}
- if consumed > bytes.len() - items_offset {
- return Err(Error::insufficient_data("items"));
- }
+
let mut sketch = Self::with_lg_map_sizes(lg_max, lg_cur);
for (item, value) in items.into_iter().zip(values) {
sketch.update_with_count(item, value);
@@ -444,7 +460,7 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
impl FrequentItemsSketch<i64> {
/// Serializes this sketch into a byte vector.
pub fn serialize(&self) -> Vec<u8> {
- self.serialize_inner(serialize_i64_items)
+ self.serialize_inner(count_i64_items_bytes, serialize_i64_items)
}
/// Deserializes a sketch from bytes.
@@ -456,7 +472,7 @@ impl FrequentItemsSketch<i64> {
impl FrequentItemsSketch<String> {
/// Serializes this sketch into a byte vector.
pub fn serialize(&self) -> Vec<u8> {
- self.serialize_inner(serialize_string_items)
+ self.serialize_inner(count_string_items_bytes, serialize_string_items)
}
/// Deserializes a sketch from bytes.
diff --git a/datasketches/src/hll/array4.rs b/datasketches/src/hll/array4.rs
index 3944a41..5bf809f 100644
--- a/datasketches/src/hll/array4.rs
+++ b/datasketches/src/hll/array4.rs
@@ -375,7 +375,7 @@ impl Array4 {
// Write standard header
bytes.write_u8(HLL_PREINTS);
- bytes.write_u8(SERIAL_VER);
+ bytes.write_u8(SERIAL_VERSION);
bytes.write_u8(HLL_FAMILY_ID);
bytes.write_u8(lg_config_k);
bytes.write_u8(0); // unused for HLL mode
diff --git a/datasketches/src/hll/array6.rs b/datasketches/src/hll/array6.rs
index f247a67..223424d 100644
--- a/datasketches/src/hll/array6.rs
+++ b/datasketches/src/hll/array6.rs
@@ -228,7 +228,7 @@ impl Array6 {
// Write standard header
bytes.write_u8(HLL_PREINTS);
- bytes.write_u8(SERIAL_VER);
+ bytes.write_u8(SERIAL_VERSION);
bytes.write_u8(HLL_FAMILY_ID);
bytes.write_u8(lg_config_k);
bytes.write_u8(0); // unused for HLL mode
diff --git a/datasketches/src/hll/array8.rs b/datasketches/src/hll/array8.rs
index 3ac1f0c..9f0f849 100644
--- a/datasketches/src/hll/array8.rs
+++ b/datasketches/src/hll/array8.rs
@@ -300,7 +300,7 @@ impl Array8 {
// Write standard header
bytes.write_u8(HLL_PREINTS);
- bytes.write_u8(SERIAL_VER);
+ bytes.write_u8(SERIAL_VERSION);
bytes.write_u8(HLL_FAMILY_ID);
bytes.write_u8(lg_config_k);
bytes.write_u8(0); // unused for HLL mode
diff --git a/datasketches/src/hll/hash_set.rs b/datasketches/src/hll/hash_set.rs
index 1a31031..874d3a4 100644
--- a/datasketches/src/hll/hash_set.rs
+++ b/datasketches/src/hll/hash_set.rs
@@ -148,7 +148,7 @@ impl HashSet {
// Write preamble
bytes.write_u8(HASH_SET_PREINTS);
- bytes.write_u8(SERIAL_VER);
+ bytes.write_u8(SERIAL_VERSION);
bytes.write_u8(HLL_FAMILY_ID);
bytes.write_u8(lg_config_k);
bytes.write_u8(lg_arr as u8);
diff --git a/datasketches/src/hll/list.rs b/datasketches/src/hll/list.rs
index 2fa9173..1abf699 100644
--- a/datasketches/src/hll/list.rs
+++ b/datasketches/src/hll/list.rs
@@ -110,7 +110,7 @@ impl List {
// Write preamble
bytes.write_u8(LIST_PREINTS);
- bytes.write_u8(SERIAL_VER);
+ bytes.write_u8(SERIAL_VERSION);
bytes.write_u8(HLL_FAMILY_ID);
bytes.write_u8(lg_config_k);
bytes.write_u8(lg_arr as u8);
diff --git a/datasketches/src/hll/serialization.rs
b/datasketches/src/hll/serialization.rs
index 30f034f..5fdb2b3 100644
--- a/datasketches/src/hll/serialization.rs
+++ b/datasketches/src/hll/serialization.rs
@@ -24,7 +24,7 @@
pub const HLL_FAMILY_ID: u8 = 7;
/// Current serialization version
-pub const SERIAL_VER: u8 = 1;
+pub const SERIAL_VERSION: u8 = 1;
/// Flag indicating sketch is empty (no values inserted)
pub const EMPTY_FLAG_MASK: u8 = 4;
diff --git a/datasketches/src/hll/sketch.rs b/datasketches/src/hll/sketch.rs
index ef76647..64626cd 100644
--- a/datasketches/src/hll/sketch.rs
+++ b/datasketches/src/hll/sketch.rs
@@ -241,9 +241,9 @@ impl HllSketch {
}
// Verify serialization version
- if serial_version != SERIAL_VER {
+ if serial_version != SERIAL_VERSION {
return Err(Error::unsupported_serial_version(
- SERIAL_VER,
+ SERIAL_VERSION,
serial_version,
));
}
diff --git a/datasketches/src/tdigest/sketch.rs
b/datasketches/src/tdigest/sketch.rs
index c9ec382..ddf440f 100644
--- a/datasketches/src/tdigest/sketch.rs
+++ b/datasketches/src/tdigest/sketch.rs
@@ -19,6 +19,7 @@ use std::cmp::Ordering;
use std::convert::identity;
use std::num::NonZeroU64;
+use crate::codec::SketchBytes;
use crate::codec::SketchSlice;
use crate::error::Error;
use crate::error::ErrorKind;
@@ -316,16 +317,16 @@ impl TDigestMut {
total_size += self.centroids.len() * (size_of::<f64>() +
size_of::<u64>());
}
- let mut bytes = Vec::with_capacity(total_size);
- bytes.push(match self.total_weight() {
+ let mut bytes = SketchBytes::with_capacity(total_size);
+ bytes.write_u8(match self.total_weight() {
0 => PREAMBLE_LONGS_EMPTY_OR_SINGLE,
1 => PREAMBLE_LONGS_EMPTY_OR_SINGLE,
_ => PREAMBLE_LONGS_MULTIPLE,
});
- bytes.push(SERIAL_VERSION);
- bytes.push(TDIGEST_FAMILY_ID);
- bytes.extend_from_slice(&self.k.to_le_bytes());
- bytes.push({
+ bytes.write_u8(SERIAL_VERSION);
+ bytes.write_u8(TDIGEST_FAMILY_ID);
+ bytes.write_u16_le(self.k);
+ bytes.write_u8({
let mut flags = 0;
if self.is_empty() {
flags |= FLAGS_IS_EMPTY;
@@ -338,23 +339,23 @@ impl TDigestMut {
}
flags
});
- bytes.extend_from_slice(&0u16.to_le_bytes()); // unused
+ bytes.write_u16_le(0); // unused
if self.is_empty() {
- return bytes;
+ return bytes.into_bytes();
}
if self.is_single_value() {
- bytes.extend_from_slice(&self.min.to_le_bytes());
- return bytes;
+ bytes.write_f64_le(self.min);
+ return bytes.into_bytes();
}
- bytes.extend_from_slice(&(self.centroids.len() as u32).to_le_bytes());
- bytes.extend_from_slice(&0u32.to_le_bytes()); // unused
- bytes.extend_from_slice(&self.min.to_le_bytes());
- bytes.extend_from_slice(&self.max.to_le_bytes());
+ bytes.write_u32_le(self.centroids.len() as u32);
+ bytes.write_u32_le(0); // unused
+ bytes.write_f64_le(self.min);
+ bytes.write_f64_le(self.max);
for centroid in &self.centroids {
- bytes.extend_from_slice(¢roid.mean.to_le_bytes());
- bytes.extend_from_slice(¢roid.weight.get().to_le_bytes());
+ bytes.write_f64_le(centroid.mean);
+ bytes.write_u64_le(centroid.weight.get());
}
- bytes
+ bytes.into_bytes()
}
/// Deserializes a TDigest from bytes.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]