This is an automated email from the ASF dual-hosted git repository.
tison pushed a commit to branch frequent-items
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git
The following commit(s) were added to refs/heads/frequent-items by this push:
new 316322e reintroduce abstraction
316322e is described below
commit 316322e3d9c4ec85f85cd1e29e0a90d734d21989
Author: tison <[email protected]>
AuthorDate: Sun Feb 1 15:27:51 2026 +0800
reintroduce abstraction
Signed-off-by: tison <[email protected]>
---
datasketches/src/frequencies/serialization.rs | 87 ++++++++++++------------
datasketches/src/frequencies/sketch.rs | 96 +++++++++++++++++++++++++--
2 files changed, 135 insertions(+), 48 deletions(-)
diff --git a/datasketches/src/frequencies/serialization.rs
b/datasketches/src/frequencies/serialization.rs
index 3f8600b..447c706 100644
--- a/datasketches/src/frequencies/serialization.rs
+++ b/datasketches/src/frequencies/serialization.rs
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+use std::hash::Hash;
+
use crate::codec::SketchBytes;
use crate::codec::SketchSlice;
use crate::error::Error;
@@ -32,66 +34,63 @@ pub const PREAMBLE_LONGS_NONEMPTY: u8 = 4;
/// Empty flag mask (both bits for compatibility).
pub const EMPTY_FLAG_MASK: u8 = 5;
-pub(crate) fn count_string_items_bytes(items: &[String]) -> usize {
- items.iter().map(|item| 4 + item.len()).sum()
+/// Trait for serializing and deserializing frequent item values.
+pub trait FrequentItemValue: Sized + Eq + Hash + Clone {
+ /// Returns the size in bytes required to serialize the given item.
+ fn serialize_size(item: &Self) -> usize;
+ /// Serializes the item into the given byte buffer.
+ fn serialize_value(&self, bytes: &mut SketchBytes);
+ /// Deserializes an item from the given byte cursor.
+ fn deserialize_value(cursor: &mut SketchSlice<'_>) -> Result<Self, Error>;
}
-pub(crate) fn serialize_string_items(bytes: &mut SketchBytes, items:
&[String]) {
- for item in items {
- let bs = item.as_bytes();
+impl FrequentItemValue for String {
+ fn serialize_size(item: &Self) -> usize {
+ size_of::<u32>() + item.len()
+ }
+
+ fn serialize_value(&self, bytes: &mut SketchBytes) {
+ let bs = self.as_bytes();
bytes.write_u32_le(bs.len() as u32);
bytes.write(bs);
}
-}
-pub(crate) fn deserialize_string_items(
- mut cursor: SketchSlice<'_>,
- num_items: usize,
-) -> Result<Vec<String>, Error> {
- let mut items = Vec::with_capacity(num_items);
- for i in 0..num_items {
+ fn deserialize_value(cursor: &mut SketchSlice<'_>) -> Result<Self, Error> {
let len = cursor.read_u32_le().map_err(|_| {
- Error::insufficient_data(format!(
- "expected {num_items} string items, failed to read len at
index {i}"
- ))
+ Error::insufficient_data("failed to read string item
length".to_string())
})?;
let mut slice = vec![0; len as usize];
cursor.read_exact(&mut slice).map_err(|_| {
- Error::insufficient_data(format!(
- "expected {num_items} string items, failed to read slice at
index {i}"
- ))
+ Error::insufficient_data("failed to read string item
bytes".to_string())
})?;
- let value = String::from_utf8(slice)
- .map_err(|_| Error::deserial(format!("invalid UTF-8 string payload
at index {i}")))?;
- items.push(value);
+ String::from_utf8(slice)
+ .map_err(|_| Error::deserial("invalid UTF-8 string
payload".to_string()))
}
- Ok(items)
}
-pub(crate) fn count_i64_items_bytes(items: &[i64]) -> usize {
- items.len() * 8
-}
+macro_rules! impl_primitive {
+ ($name:ty, $read:ident, $write:ident) => {
+ impl FrequentItemValue for $name {
+ fn serialize_size(_item: &Self) -> usize {
+ size_of::<$name>()
+ }
-pub(crate) fn serialize_i64_items(bytes: &mut SketchBytes, items: &[i64]) {
- for item in items.iter().copied() {
- bytes.write_i64_le(item);
- }
-}
+ fn serialize_value(&self, bytes: &mut SketchBytes) {
+ bytes.$write(*self);
+ }
-pub(crate) fn deserialize_i64_items(
- mut cursor: SketchSlice<'_>,
- num_items: usize,
-) -> Result<Vec<i64>, Error> {
- let mut items = Vec::with_capacity(num_items);
- for i in 0..num_items {
- let value = cursor.read_i64_le().map_err(|_| {
- Error::insufficient_data(format!(
- "expected {num_items} i64 items, failed at index {i}"
- ))
- })?;
- items.push(value);
- }
- Ok(items)
+ fn deserialize_value(cursor: &mut SketchSlice<'_>) -> Result<Self,
Error> {
+ cursor.$read().map_err(|_| {
+ Error::insufficient_data(
+ concat!("failed to read ", stringify!($name), " item
bytes").to_string(),
+ )
+ })
+ }
+ }
+ };
}
+
+impl_primitive!(i64, read_i64_le, write_i64_le);
+impl_primitive!(u64, read_u64_le, write_u64_le);
diff --git a/datasketches/src/frequencies/sketch.rs
b/datasketches/src/frequencies/sketch.rs
index 9ee44b6..8b9784a 100644
--- a/datasketches/src/frequencies/sketch.rs
+++ b/datasketches/src/frequencies/sketch.rs
@@ -546,7 +546,14 @@ impl FrequentItemsSketch<i64> {
/// assert!(decoded.estimate(&7) >= 2);
/// ```
pub fn serialize(&self) -> Vec<u8> {
- self.serialize_inner(count_i64_items_bytes, serialize_i64_items)
+ self.serialize_inner(
+ |items| items.iter().map(i64::serialize_size).sum(),
+ |bytes, items| {
+ for item in items {
+ item.serialize_value(bytes);
+ }
+ },
+ )
}
/// Deserializes a sketch from bytes.
@@ -562,7 +569,70 @@ impl FrequentItemsSketch<i64> {
/// assert!(decoded.estimate(&7) >= 2);
/// ```
pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
- Self::deserialize_inner(bytes, deserialize_i64_items)
+ Self::deserialize_inner(bytes, |mut cursor, num_items| {
+ let mut items = Vec::with_capacity(num_items);
+ for i in 0..num_items {
+ let item = i64::deserialize_value(&mut cursor).map_err(|_| {
+ Error::insufficient_data(format!(
+ "expected {num_items} items, failed to read item at
index {i}"
+ ))
+ })?;
+ items.push(item);
+ }
+ Ok(items)
+ })
+ }
+}
+
+impl FrequentItemsSketch<u64> {
+ /// Serializes this sketch into a byte vector.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use datasketches::frequencies::FrequentItemsSketch;
+ /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
+ /// # sketch.update_with_count(7, 2);
+ /// let bytes = sketch.serialize();
+ /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
+ /// assert!(decoded.estimate(&7) >= 2);
+ /// ```
+ pub fn serialize(&self) -> Vec<u8> {
+ self.serialize_inner(
+ |items| items.iter().map(u64::serialize_size).sum(),
+ |bytes, items| {
+ for item in items {
+ item.serialize_value(bytes);
+ }
+ },
+ )
+ }
+
+ /// Deserializes a sketch from bytes.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use datasketches::frequencies::FrequentItemsSketch;
+ /// # let mut sketch = FrequentItemsSketch::<u64>::new(64);
+ /// # sketch.update_with_count(7, 2);
+ /// # let bytes = sketch.serialize();
+ /// let decoded = FrequentItemsSketch::<u64>::deserialize(&bytes).unwrap();
+ /// assert!(decoded.estimate(&7) >= 2);
+ /// ```
+ pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
+ Self::deserialize_inner(bytes, |mut cursor, num_items| {
+ let mut items = Vec::with_capacity(num_items);
+ for i in 0..num_items {
+ let item = u64::deserialize_value(&mut cursor).map_err(|_| {
+ Error::insufficient_data(format!(
+ "expected {num_items} items, failed to read item at
index {i}"
+ ))
+ })?;
+ items.push(item);
+ }
+ Ok(items)
+ })
}
}
@@ -581,7 +651,14 @@ impl FrequentItemsSketch<String> {
/// assert!(decoded.estimate(&apple) >= 2);
/// ```
pub fn serialize(&self) -> Vec<u8> {
- self.serialize_inner(count_string_items_bytes, serialize_string_items)
+ self.serialize_inner(
+ |items| items.iter().map(String::serialize_size).sum(),
+ |bytes, items| {
+ for item in items {
+ item.serialize_value(bytes);
+ }
+ },
+ )
}
/// Deserializes a sketch from bytes.
@@ -598,6 +675,17 @@ impl FrequentItemsSketch<String> {
/// assert!(decoded.estimate(&apple) >= 2);
/// ```
pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
- Self::deserialize_inner(bytes, deserialize_string_items)
+ Self::deserialize_inner(bytes, |mut cursor, num_items| {
+ let mut items = Vec::with_capacity(num_items);
+ for i in 0..num_items {
+ let item = String::deserialize_value(&mut cursor).map_err(|_| {
+ Error::insufficient_data(format!(
+ "expected {num_items} items, failed to read item at
index {i}"
+ ))
+ })?;
+ items.push(item);
+ }
+ Ok(items)
+ })
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]