This is an automated email from the ASF dual-hosted git repository.

tison pushed a commit to branch frequent-items
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git


The following commit(s) were added to refs/heads/frequent-items by this push:
     new 316322e  reintroduce abstraction
316322e is described below

commit 316322e3d9c4ec85f85cd1e29e0a90d734d21989
Author: tison <[email protected]>
AuthorDate: Sun Feb 1 15:27:51 2026 +0800

    reintroduce abstraction
    
    Signed-off-by: tison <[email protected]>
---
 datasketches/src/frequencies/serialization.rs | 87 ++++++++++++------------
 datasketches/src/frequencies/sketch.rs        | 96 +++++++++++++++++++++++++--
 2 files changed, 135 insertions(+), 48 deletions(-)

diff --git a/datasketches/src/frequencies/serialization.rs 
b/datasketches/src/frequencies/serialization.rs
index 3f8600b..447c706 100644
--- a/datasketches/src/frequencies/serialization.rs
+++ b/datasketches/src/frequencies/serialization.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::hash::Hash;
+
 use crate::codec::SketchBytes;
 use crate::codec::SketchSlice;
 use crate::error::Error;
@@ -32,66 +34,63 @@ pub const PREAMBLE_LONGS_NONEMPTY: u8 = 4;
 /// Empty flag mask (both bits for compatibility).
 pub const EMPTY_FLAG_MASK: u8 = 5;
 
-pub(crate) fn count_string_items_bytes(items: &[String]) -> usize {
-    items.iter().map(|item| 4 + item.len()).sum()
+/// Trait for serializing and deserializing frequent item values.
+pub trait FrequentItemValue: Sized + Eq + Hash + Clone {
+    /// Returns the size in bytes required to serialize the given item.
+    fn serialize_size(item: &Self) -> usize;
+    /// Serializes the item into the given byte buffer.
+    fn serialize_value(&self, bytes: &mut SketchBytes);
+    /// Deserializes an item from the given byte cursor.
+    fn deserialize_value(cursor: &mut SketchSlice<'_>) -> Result<Self, Error>;
 }
 
-pub(crate) fn serialize_string_items(bytes: &mut SketchBytes, items: 
&[String]) {
-    for item in items {
-        let bs = item.as_bytes();
+impl FrequentItemValue for String {
+    fn serialize_size(item: &Self) -> usize {
+        size_of::<u32>() + item.len()
+    }
+
+    fn serialize_value(&self, bytes: &mut SketchBytes) {
+        let bs = self.as_bytes();
         bytes.write_u32_le(bs.len() as u32);
         bytes.write(bs);
     }
-}
 
-pub(crate) fn deserialize_string_items(
-    mut cursor: SketchSlice<'_>,
-    num_items: usize,
-) -> Result<Vec<String>, Error> {
-    let mut items = Vec::with_capacity(num_items);
-    for i in 0..num_items {
+    fn deserialize_value(cursor: &mut SketchSlice<'_>) -> Result<Self, Error> {
         let len = cursor.read_u32_le().map_err(|_| {
-            Error::insufficient_data(format!(
-                "expected {num_items} string items, failed to read len at 
index {i}"
-            ))
+            Error::insufficient_data("failed to read string item 
length".to_string())
         })?;
 
         let mut slice = vec![0; len as usize];
         cursor.read_exact(&mut slice).map_err(|_| {
-            Error::insufficient_data(format!(
-                "expected {num_items} string items, failed to read slice at 
index {i}"
-            ))
+            Error::insufficient_data("failed to read string item 
bytes".to_string())
         })?;
 
-        let value = String::from_utf8(slice)
-            .map_err(|_| Error::deserial(format!("invalid UTF-8 string payload 
at index {i}")))?;
-        items.push(value);
+        String::from_utf8(slice)
+            .map_err(|_| Error::deserial("invalid UTF-8 string 
payload".to_string()))
     }
-    Ok(items)
 }
 
-pub(crate) fn count_i64_items_bytes(items: &[i64]) -> usize {
-    items.len() * 8
-}
+macro_rules! impl_primitive {
+    ($name:ty, $read:ident, $write:ident) => {
+        impl FrequentItemValue for $name {
+            fn serialize_size(_item: &Self) -> usize {
+                size_of::<$name>()
+            }
 
-pub(crate) fn serialize_i64_items(bytes: &mut SketchBytes, items: &[i64]) {
-    for item in items.iter().copied() {
-        bytes.write_i64_le(item);
-    }
-}
+            fn serialize_value(&self, bytes: &mut SketchBytes) {
+                bytes.$write(*self);
+            }
 
-pub(crate) fn deserialize_i64_items(
-    mut cursor: SketchSlice<'_>,
-    num_items: usize,
-) -> Result<Vec<i64>, Error> {
-    let mut items = Vec::with_capacity(num_items);
-    for i in 0..num_items {
-        let value = cursor.read_i64_le().map_err(|_| {
-            Error::insufficient_data(format!(
-                "expected {num_items} i64 items, failed at index {i}"
-            ))
-        })?;
-        items.push(value);
-    }
-    Ok(items)
+            fn deserialize_value(cursor: &mut SketchSlice<'_>) -> Result<Self, 
Error> {
+                cursor.$read().map_err(|_| {
+                    Error::insufficient_data(
+                        concat!("failed to read ", stringify!($name), " item 
bytes").to_string(),
+                    )
+                })
+            }
+        }
+    };
 }
+
+impl_primitive!(i64, read_i64_le, write_i64_le);
+impl_primitive!(u64, read_u64_le, write_u64_le);
diff --git a/datasketches/src/frequencies/sketch.rs 
b/datasketches/src/frequencies/sketch.rs
index 9ee44b6..8b9784a 100644
--- a/datasketches/src/frequencies/sketch.rs
+++ b/datasketches/src/frequencies/sketch.rs
@@ -546,7 +546,14 @@ impl FrequentItemsSketch<i64> {
     /// assert!(decoded.estimate(&7) >= 2);
     /// ```
     pub fn serialize(&self) -> Vec<u8> {
-        self.serialize_inner(count_i64_items_bytes, serialize_i64_items)
+        self.serialize_inner(
+            |items| items.iter().map(i64::serialize_size).sum(),
+            |bytes, items| {
+                for item in items {
+                    item.serialize_value(bytes);
+                }
+            },
+        )
     }
 
     /// Deserializes a sketch from bytes.
@@ -562,7 +569,70 @@ impl FrequentItemsSketch<i64> {
     /// assert!(decoded.estimate(&7) >= 2);
     /// ```
     pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
-        Self::deserialize_inner(bytes, deserialize_i64_items)
+        Self::deserialize_inner(bytes, |mut cursor, num_items| {
+            let mut items = Vec::with_capacity(num_items);
+            for i in 0..num_items {
+                let item = i64::deserialize_value(&mut cursor).map_err(|_| {
+                    Error::insufficient_data(format!(
+                        "expected {num_items} items, failed to read item at 
index {i}"
+                    ))
+                })?;
+                items.push(item);
+            }
+            Ok(items)
+        })
+    }
+}
+
+impl FrequentItemsSketch<u64> {
+    /// Serializes this sketch into a byte vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
+    /// # sketch.update_with_count(7, 2);
+    /// let bytes = sketch.serialize();
+    /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate(&7) >= 2);
+    /// ```
+    pub fn serialize(&self) -> Vec<u8> {
+        self.serialize_inner(
+            |items| items.iter().map(u64::serialize_size).sum(),
+            |bytes, items| {
+                for item in items {
+                    item.serialize_value(bytes);
+                }
+            },
+        )
+    }
+
+    /// Deserializes a sketch from bytes.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// # let mut sketch = FrequentItemsSketch::<u64>::new(64);
+    /// # sketch.update_with_count(7, 2);
+    /// # let bytes = sketch.serialize();
+    /// let decoded = FrequentItemsSketch::<u64>::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate(&7) >= 2);
+    /// ```
+    pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
+        Self::deserialize_inner(bytes, |mut cursor, num_items| {
+            let mut items = Vec::with_capacity(num_items);
+            for i in 0..num_items {
+                let item = u64::deserialize_value(&mut cursor).map_err(|_| {
+                    Error::insufficient_data(format!(
+                        "expected {num_items} items, failed to read item at 
index {i}"
+                    ))
+                })?;
+                items.push(item);
+            }
+            Ok(items)
+        })
     }
 }
 
@@ -581,7 +651,14 @@ impl FrequentItemsSketch<String> {
     /// assert!(decoded.estimate(&apple) >= 2);
     /// ```
     pub fn serialize(&self) -> Vec<u8> {
-        self.serialize_inner(count_string_items_bytes, serialize_string_items)
+        self.serialize_inner(
+            |items| items.iter().map(String::serialize_size).sum(),
+            |bytes, items| {
+                for item in items {
+                    item.serialize_value(bytes);
+                }
+            },
+        )
     }
 
     /// Deserializes a sketch from bytes.
@@ -598,6 +675,17 @@ impl FrequentItemsSketch<String> {
     /// assert!(decoded.estimate(&apple) >= 2);
     /// ```
     pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
-        Self::deserialize_inner(bytes, deserialize_string_items)
+        Self::deserialize_inner(bytes, |mut cursor, num_items| {
+            let mut items = Vec::with_capacity(num_items);
+            for i in 0..num_items {
+                let item = String::deserialize_value(&mut cursor).map_err(|_| {
+                    Error::insufficient_data(format!(
+                        "expected {num_items} items, failed to read item at 
index {i}"
+                    ))
+                })?;
+                items.push(item);
+            }
+            Ok(items)
+        })
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to