This is an automated email from the ASF dual-hosted git repository.

tison pushed a commit to branch frequent-items
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git

commit 9044acbaae2dd547935282d5e5837b1a0847b45a
Author: tison <[email protected]>
AuthorDate: Sun Feb 1 15:20:57 2026 +0800

    Revert "refactor: support customized FrequentItemValue"
    
    This reverts commit 98082a54c644796b6752860059f0a3c12f88d069.
---
 datasketches/src/codec.rs                     |  6 +-
 datasketches/src/frequencies/mod.rs           |  1 -
 datasketches/src/frequencies/serialization.rs | 92 +++++++++++++--------------
 datasketches/src/frequencies/sketch.rs        | 79 +++++++++++------------
 datasketches/src/lib.rs                       |  2 +-
 datasketches/tests/frequencies_update_test.rs |  4 +-
 6 files changed, 87 insertions(+), 97 deletions(-)

diff --git a/datasketches/src/codec.rs b/datasketches/src/codec.rs
index 7a13534..4df7b22 100644
--- a/datasketches/src/codec.rs
+++ b/datasketches/src/codec.rs
@@ -15,11 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![allow(dead_code)]
+
 use std::io;
 use std::io::Cursor;
 use std::io::Read;
 
-pub struct SketchBytes {
+pub(crate) struct SketchBytes {
     bytes: Vec<u8>,
 }
 
@@ -111,7 +113,7 @@ impl SketchBytes {
     }
 }
 
-pub struct SketchSlice<'a> {
+pub(crate) struct SketchSlice<'a> {
     slice: Cursor<&'a [u8]>,
 }
 
diff --git a/datasketches/src/frequencies/mod.rs 
b/datasketches/src/frequencies/mod.rs
index 8f865d6..93fb5e4 100644
--- a/datasketches/src/frequencies/mod.rs
+++ b/datasketches/src/frequencies/mod.rs
@@ -52,7 +52,6 @@ mod reverse_purge_item_hash_map;
 mod serialization;
 mod sketch;
 
-pub use self::serialization::FrequentItemValue;
 pub use self::sketch::ErrorType;
 pub use self::sketch::FrequentItemsSketch;
 pub use self::sketch::Row;
diff --git a/datasketches/src/frequencies/serialization.rs 
b/datasketches/src/frequencies/serialization.rs
index c02cdef..3f8600b 100644
--- a/datasketches/src/frequencies/serialization.rs
+++ b/datasketches/src/frequencies/serialization.rs
@@ -18,7 +18,6 @@
 use crate::codec::SketchBytes;
 use crate::codec::SketchSlice;
 use crate::error::Error;
-use std::hash::Hash;
 
 /// Family ID for frequency sketches.
 pub const FAMILY_ID: u8 = 10;
@@ -33,69 +32,66 @@ pub const PREAMBLE_LONGS_NONEMPTY: u8 = 4;
 /// Empty flag mask (both bits for compatibility).
 pub const EMPTY_FLAG_MASK: u8 = 5;
 
-/// Trait for serializing and deserializing frequent item values.
-pub trait FrequentItemValue: Sized + Eq + Hash + Clone {
-    /// Returns the size in bytes required to serialize the given item.
-    fn serialize_size(item: &Self) -> usize;
-    /// Serializes the item into the given byte buffer.
-    fn serialize_value(&self, bytes: &mut SketchBytes);
-    /// Deserializes an item from the given byte cursor.
-    fn deserialize_value(cursor: &mut SketchSlice<'_>) -> Result<Self, Error>;
+pub(crate) fn count_string_items_bytes(items: &[String]) -> usize {
+    items.iter().map(|item| 4 + item.len()).sum()
 }
 
-impl FrequentItemValue for String {
-    fn serialize_size(item: &Self) -> usize {
-        size_of::<u32>() + item.len()
-    }
-
-    fn serialize_value(&self, bytes: &mut SketchBytes) {
-        let bs = self.as_bytes();
+pub(crate) fn serialize_string_items(bytes: &mut SketchBytes, items: 
&[String]) {
+    for item in items {
+        let bs = item.as_bytes();
         bytes.write_u32_le(bs.len() as u32);
         bytes.write(bs);
     }
+}
 
-    fn deserialize_value(cursor: &mut SketchSlice<'_>) -> Result<Self, Error> {
+pub(crate) fn deserialize_string_items(
+    mut cursor: SketchSlice<'_>,
+    num_items: usize,
+) -> Result<Vec<String>, Error> {
+    let mut items = Vec::with_capacity(num_items);
+    for i in 0..num_items {
         let len = cursor.read_u32_le().map_err(|_| {
-            Error::insufficient_data("failed to read string item 
length".to_string())
+            Error::insufficient_data(format!(
+                "expected {num_items} string items, failed to read len at 
index {i}"
+            ))
         })?;
 
         let mut slice = vec![0; len as usize];
         cursor.read_exact(&mut slice).map_err(|_| {
-            Error::insufficient_data("failed to read string item 
bytes".to_string())
+            Error::insufficient_data(format!(
+                "expected {num_items} string items, failed to read slice at 
index {i}"
+            ))
         })?;
 
-        String::from_utf8(slice)
-            .map_err(|_| Error::deserial("invalid UTF-8 string 
payload".to_string()))
+        let value = String::from_utf8(slice)
+            .map_err(|_| Error::deserial(format!("invalid UTF-8 string payload 
at index {i}")))?;
+        items.push(value);
     }
+    Ok(items)
 }
 
-macro_rules! impl_primitive {
-    ($name:ty, $read:ident, $write:ident) => {
-        impl FrequentItemValue for $name {
-            fn serialize_size(_item: &Self) -> usize {
-                size_of::<$name>()
-            }
-
-            fn serialize_value(&self, bytes: &mut SketchBytes) {
-                bytes.$write(*self);
-            }
+pub(crate) fn count_i64_items_bytes(items: &[i64]) -> usize {
+    items.len() * 8
+}
 
-            fn deserialize_value(cursor: &mut SketchSlice<'_>) -> Result<Self, 
Error> {
-                cursor.$read().map_err(|_| {
-                    Error::insufficient_data(
-                        concat!("failed to read ", stringify!($name), " item 
bytes").to_string(),
-                    )
-                })
-            }
-        }
-    };
+pub(crate) fn serialize_i64_items(bytes: &mut SketchBytes, items: &[i64]) {
+    for item in items.iter().copied() {
+        bytes.write_i64_le(item);
+    }
 }
 
-impl_primitive!(i8, read_i8, write_i8);
-impl_primitive!(u8, read_u8, write_u8);
-impl_primitive!(i16, read_i16_le, write_i16_le);
-impl_primitive!(u16, read_u16_le, write_u16_le);
-impl_primitive!(i32, read_i32_le, write_i32_le);
-impl_primitive!(u32, read_u32_le, write_u32_le);
-impl_primitive!(i64, read_i64_le, write_i64_le);
-impl_primitive!(u64, read_u64_le, write_u64_le);
+pub(crate) fn deserialize_i64_items(
+    mut cursor: SketchSlice<'_>,
+    num_items: usize,
+) -> Result<Vec<i64>, Error> {
+    let mut items = Vec::with_capacity(num_items);
+    for i in 0..num_items {
+        let value = cursor.read_i64_le().map_err(|_| {
+            Error::insufficient_data(format!(
+                "expected {num_items} i64 items, failed at index {i}"
+            ))
+        })?;
+        items.push(value);
+    }
+    Ok(items)
+}
diff --git a/datasketches/src/frequencies/sketch.rs 
b/datasketches/src/frequencies/sketch.rs
index 7bf5f65..e0d9711 100644
--- a/datasketches/src/frequencies/sketch.rs
+++ b/datasketches/src/frequencies/sketch.rs
@@ -66,12 +66,14 @@ impl<T> Row<T> {
         self.estimate
     }
 
-    /// Returns the guaranteed upper bound for the frequency.
+    /// Returns the upper bound for the frequency.
     pub fn upper_bound(&self) -> u64 {
         self.upper_bound
     }
 
     /// Returns the guaranteed lower bound for the frequency.
+    ///
+    /// This value is never negative.
     pub fn lower_bound(&self) -> u64 {
         self.lower_bound
     }
@@ -113,11 +115,7 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
     /// assert_eq!(sketch.num_active_items(), 2);
     /// ```
     pub fn new(max_map_size: usize) -> Self {
-        assert!(
-            max_map_size.is_power_of_two(),
-            "max_map_size must be power of 2"
-        );
-        let lg_max_map_size = max_map_size.trailing_zeros() as u8;
+        let lg_max_map_size = exact_log2(max_map_size);
         Self::with_lg_map_sizes(lg_max_map_size, LG_MIN_MAP_SIZE)
     }
 
@@ -532,13 +530,11 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
     }
 }
 
-impl<T: FrequentItemValue> FrequentItemsSketch<T> {
+impl FrequentItemsSketch<i64> {
     /// Serializes this sketch into a byte vector.
     ///
     /// # Examples
     ///
-    /// Use with `i64` items:
-    ///
     /// ```
     /// # use datasketches::frequencies::FrequentItemsSketch;
     /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
@@ -547,8 +543,31 @@ impl<T: FrequentItemValue> FrequentItemsSketch<T> {
     /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
     /// assert!(decoded.estimate(&7) >= 2);
     /// ```
+    pub fn serialize(&self) -> Vec<u8> {
+        self.serialize_inner(count_i64_items_bytes, serialize_i64_items)
+    }
+
+    /// Deserializes a sketch from bytes.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
+    /// # sketch.update_with_count(7, 2);
+    /// # let bytes = sketch.serialize();
+    /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate(&7) >= 2);
+    /// ```
+    pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
+        Self::deserialize_inner(bytes, deserialize_i64_items)
+    }
+}
+
+impl FrequentItemsSketch<String> {
+    /// Serializes this sketch into a byte vector.
     ///
-    /// Use with `String` items:
+    /// # Examples
     ///
     /// ```
     /// # use datasketches::frequencies::FrequentItemsSketch;
@@ -560,33 +579,13 @@ impl<T: FrequentItemValue> FrequentItemsSketch<T> {
     /// assert!(decoded.estimate(&apple) >= 2);
     /// ```
     pub fn serialize(&self) -> Vec<u8> {
-        self.serialize_inner(
-            |items| items.iter().map(|item| T::serialize_size(item)).sum(),
-            |bytes, items| {
-                for item in items {
-                    item.serialize_value(bytes);
-                }
-            },
-        )
+        self.serialize_inner(count_string_items_bytes, serialize_string_items)
     }
 
     /// Deserializes a sketch from bytes.
     ///
     /// # Examples
     ///
-    /// Use with `i64` items:
-    ///
-    /// ```
-    /// # use datasketches::frequencies::FrequentItemsSketch;
-    /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
-    /// # sketch.update_with_count(7, 2);
-    /// # let bytes = sketch.serialize();
-    /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
-    /// assert!(decoded.estimate(&7) >= 2);
-    /// ```
-    ///
-    /// Use with `String` items:
-    ///
     /// ```
     /// # use datasketches::frequencies::FrequentItemsSketch;
     /// # let mut sketch = FrequentItemsSketch::<String>::new(64);
@@ -597,17 +596,11 @@ impl<T: FrequentItemValue> FrequentItemsSketch<T> {
     /// assert!(decoded.estimate(&apple) >= 2);
     /// ```
     pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
-        Self::deserialize_inner(bytes, |mut cursor, num_items| {
-            let mut items = Vec::with_capacity(num_items);
-            for i in 0..num_items {
-                let item = T::deserialize_value(&mut cursor).map_err(|_| {
-                    Error::insufficient_data(format!(
-                        "expected {num_items} items, failed to read item at 
index {i}"
-                    ))
-                })?;
-                items.push(item);
-            }
-            Ok(items)
-        })
+        Self::deserialize_inner(bytes, deserialize_string_items)
     }
 }
+
+fn exact_log2(value: usize) -> u8 {
+    assert!(value.is_power_of_two(), "value must be power of 2");
+    value.trailing_zeros() as u8
+}
diff --git a/datasketches/src/lib.rs b/datasketches/src/lib.rs
index 02dc692..17701ab 100644
--- a/datasketches/src/lib.rs
+++ b/datasketches/src/lib.rs
@@ -31,7 +31,6 @@
 compile_error!("datasketches does not support big-endian targets");
 
 pub mod bloom;
-pub mod codec;
 pub mod common;
 pub mod countmin;
 pub mod cpc;
@@ -41,4 +40,5 @@ pub mod hll;
 pub mod tdigest;
 pub mod theta;
 
+mod codec;
 mod hash;
diff --git a/datasketches/tests/frequencies_update_test.rs 
b/datasketches/tests/frequencies_update_test.rs
index a5a98e1..f2b0001 100644
--- a/datasketches/tests/frequencies_update_test.rs
+++ b/datasketches/tests/frequencies_update_test.rs
@@ -480,13 +480,13 @@ fn test_longs_reset() {
 }
 
 #[test]
-#[should_panic(expected = "max_map_size must be power of 2")]
+#[should_panic(expected = "value must be power of 2")]
 fn test_longs_invalid_map_size_panics() {
     FrequentItemsSketch::<i64>::new(6);
 }
 
 #[test]
-#[should_panic(expected = "max_map_size must be power of 2")]
+#[should_panic(expected = "value must be power of 2")]
 fn test_items_invalid_map_size_panics() {
     let _ = FrequentItemsSketch::<String>::new(6);
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to