(datasketches-rust) branch main updated: feat: impl CpcSketch (#75)

tison Sat, 31 Jan 2026 19:19:09 -0800

This is an automated email from the ASF dual-hosted git repository.

tison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git



The following commit(s) were added to refs/heads/main by this push:
     new dd12abf  feat: impl CpcSketch (#75)
dd12abf is described below

commit dd12abfec488097ebcaef761042317f3e46bc0cf
Author: tison <[email protected]>
AuthorDate: Sun Feb 1 11:18:53 2026 +0800

    feat: impl CpcSketch (#75)
    
    Signed-off-by: tison <[email protected]>
---
 Cargo.lock                                |  91 +++++-
 Cargo.toml                                |   3 +
 datasketches/Cargo.toml                   |   1 +
 datasketches/src/common/inv_pow2_table.rs | 103 ++++++
 datasketches/src/common/mod.rs            |  14 +
 datasketches/src/common/num_std_dev.rs    |   5 +
 datasketches/src/cpc/estimator.rs         | 403 ++++++++++++++++++++++++
 datasketches/src/cpc/kxp_byte_lookup.rs   |  76 +++++
 datasketches/src/cpc/mod.rs               | 100 ++++++
 datasketches/src/cpc/pair_table.rs        | 346 +++++++++++++++++++++
 datasketches/src/cpc/sketch.rs            | 499 ++++++++++++++++++++++++++++++
 datasketches/src/cpc/union.rs             | 409 ++++++++++++++++++++++++
 datasketches/src/error.rs                 |   2 +-
 datasketches/src/hll/sketch.rs            |   2 +-
 datasketches/src/hll/union.rs             |   2 +-
 datasketches/src/lib.rs                   |   1 +
 datasketches/src/tdigest/sketch.rs        |  12 +-
 datasketches/src/theta/sketch.rs          |  22 +-
 datasketches/tests/cpc_union_test.rs      | 169 ++++++++++
 datasketches/tests/cpc_update_test.rs     |  62 ++++
 20 files changed, 2280 insertions(+), 42 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index c946f1e..1bbd4f6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -81,9 +81,9 @@ checksum = 
"9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
 
 [[package]]
 name = "clap"
-version = "4.5.53"
+version = "4.5.55"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
+checksum = "3e34525d5bbbd55da2bb745d34b36121baac88d07619a9a09cfcf4a6c0832785"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -91,9 +91,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.53"
+version = "4.5.55"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
+checksum = "59a20016a20a3da95bef50ec7238dbd09baeef4311dcdd38ec15aba69812fb61"
 dependencies = [
  "anstream",
  "anstyle",
@@ -103,9 +103,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.49"
+version = "4.5.55"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
+checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -115,9 +115,9 @@ dependencies = [
 
 [[package]]
 name = "clap_lex"
-version = "0.7.6"
+version = "0.7.7"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
+checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32"
 
 [[package]]
 name = "colorchoice"
@@ -143,6 +143,7 @@ version = "0.2.0"
 dependencies = [
  "googletest",
  "insta",
+ "rand",
 ]
 
 [[package]]
@@ -241,9 +242,9 @@ checksum = 
"a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
 
 [[package]]
 name = "libc"
-version = "0.2.178"
+version = "0.2.180"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
+checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
 
 [[package]]
 name = "linux-raw-sys"
@@ -278,20 +279,29 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
 [[package]]
 name = "proc-macro2"
-version = "1.0.104"
+version = "1.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "quote"
-version = "1.0.42"
+version = "1.0.44"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
 dependencies = [
  "proc-macro2",
 ]
@@ -302,6 +312,35 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
+[[package]]
+name = "rand"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom",
+]
+
 [[package]]
 name = "regex"
 version = "1.12.2"
@@ -364,9 +403,9 @@ checksum = 
"7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
 [[package]]
 name = "syn"
-version = "2.0.112"
+version = "2.0.114"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4"
+checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -525,3 +564,23 @@ dependencies = [
  "clap",
  "which",
 ]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.35"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "fdea86ddd5568519879b8187e1cf04e24fce28f7fe046ceecbce472ff19a2572"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.35"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "0c15e1b46eff7c6c91195752e0eeed8ef040e391cdece7c25376957d5f15df22"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/Cargo.toml b/Cargo.toml
index d387fbf..cc776fe 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,6 +35,7 @@ datasketches = { path = "datasketches" }
 clap = { version = "4.5.20", features = ["derive"] }
 insta = { version = "1.46.1" }
 googletest = { version = "0.14.2" }
+rand = { version = "0.9.2" }
 which = { version = "8.0.0" }
 
 [workspace.lints.rust]
@@ -44,4 +45,6 @@ unused_must_use = "deny"
 
 [workspace.lints.clippy]
 dbg_macro = "deny"
+
 too_many_arguments = "allow"
+needless_range_loop = "allow"
diff --git a/datasketches/Cargo.toml b/datasketches/Cargo.toml
index 214288e..88b224c 100644
--- a/datasketches/Cargo.toml
+++ b/datasketches/Cargo.toml
@@ -36,6 +36,7 @@ rustdoc-args = ["--cfg", "docsrs"]
 
 [dev-dependencies]
 googletest = { workspace = true }
+rand = { workspace = true }
 insta = { workspace = true }
 
 [lints]
diff --git a/datasketches/src/common/inv_pow2_table.rs 
b/datasketches/src/common/inv_pow2_table.rs
new file mode 100644
index 0000000..580a2fc
--- /dev/null
+++ b/datasketches/src/common/inv_pow2_table.rs
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[rustfmt::skip]
+#[allow(clippy::excessive_precision)]
+pub(crate) const INVERSE_POWERS_OF_2: [f64; 256] = [
+    1.0, 0.5, 0.25, 0.125,
+    0.0625, 0.03125, 0.015625, 0.0078125,
+    0.00390625, 0.001953125, 0.0009765625, 0.00048828125,
+    0.000244140625, 0.0001220703125, 6.103515625e-05, 3.0517578125e-05,
+    1.52587890625e-05, 7.62939453125e-06, 3.814697265625e-06, 
1.9073486328125e-06,
+    9.5367431640625e-07, 4.76837158203125e-07, 2.384185791015625e-07, 
1.1920928955078125e-07,
+    5.9604644775390625e-08, 2.9802322387695312e-08, 1.4901161193847656e-08, 
7.4505805969238281e-09,
+    3.7252902984619141e-09, 1.862645149230957e-09, 9.3132257461547852e-10, 
4.6566128730773926e-10,
+    2.3283064365386963e-10, 1.1641532182693481e-10, 5.8207660913467407e-11, 
2.9103830456733704e-11,
+    1.4551915228366852e-11, 7.2759576141834259e-12, 3.637978807091713e-12, 
1.8189894035458565e-12,
+    9.0949470177292824e-13, 4.5474735088646412e-13, 2.2737367544323206e-13, 
1.1368683772161603e-13,
+    5.6843418860808015e-14, 2.8421709430404007e-14, 1.4210854715202004e-14, 
7.1054273576010019e-15,
+    3.5527136788005009e-15, 1.7763568394002505e-15, 8.8817841970012523e-16, 
4.4408920985006262e-16,
+    2.2204460492503131e-16, 1.1102230246251565e-16, 5.5511151231257827e-17, 
2.7755575615628914e-17,
+    1.3877787807814457e-17, 6.9388939039072284e-18, 3.4694469519536142e-18, 
1.7347234759768071e-18,
+    8.6736173798840355e-19, 4.3368086899420177e-19, 2.1684043449710089e-19, 
1.0842021724855044e-19,
+    5.4210108624275222e-20, 2.7105054312137611e-20, 1.3552527156068805e-20, 
6.7762635780344027e-21,
+    3.3881317890172014e-21, 1.6940658945086007e-21, 8.4703294725430034e-22, 
4.2351647362715017e-22,
+    2.1175823681357508e-22, 1.0587911840678754e-22, 5.2939559203393771e-23, 
2.6469779601696886e-23,
+    1.3234889800848443e-23, 6.6174449004242214e-24, 3.3087224502121107e-24, 
1.6543612251060553e-24,
+    8.2718061255302767e-25, 4.1359030627651384e-25, 2.0679515313825692e-25, 
1.0339757656912846e-25,
+    5.169878828456423e-26, 2.5849394142282115e-26, 1.2924697071141057e-26, 
6.4623485355705287e-27,
+    3.2311742677852644e-27, 1.6155871338926322e-27, 8.0779356694631609e-28, 
4.0389678347315804e-28,
+    2.0194839173657902e-28, 1.0097419586828951e-28, 5.0487097934144756e-29, 
2.5243548967072378e-29,
+    1.2621774483536189e-29, 6.3108872417680944e-30, 3.1554436208840472e-30, 
1.5777218104420236e-30,
+    7.8886090522101181e-31, 3.944304526105059e-31, 1.9721522630525295e-31, 
9.8607613152626476e-32,
+    4.9303806576313238e-32, 2.4651903288156619e-32, 1.2325951644078309e-32, 
6.1629758220391547e-33,
+    3.0814879110195774e-33, 1.5407439555097887e-33, 7.7037197775489434e-34, 
3.8518598887744717e-34,
+    1.9259299443872359e-34, 9.6296497219361793e-35, 4.8148248609680896e-35, 
2.4074124304840448e-35,
+    1.2037062152420224e-35, 6.018531076210112e-36, 3.009265538105056e-36, 
1.504632769052528e-36,
+    7.5231638452626401e-37, 3.76158192263132e-37, 1.88079096131566e-37, 
9.4039548065783001e-38,
+    4.70197740328915e-38, 2.350988701644575e-38, 1.1754943508222875e-38, 
5.8774717541114375e-39,
+    2.9387358770557188e-39, 1.4693679385278594e-39, 7.3468396926392969e-40, 
3.6734198463196485e-40,
+    1.8367099231598242e-40, 9.1835496157991212e-41, 4.5917748078995606e-41, 
2.2958874039497803e-41,
+    1.1479437019748901e-41, 5.7397185098744507e-42, 2.8698592549372254e-42, 
1.4349296274686127e-42,
+    7.1746481373430634e-43, 3.5873240686715317e-43, 1.7936620343357659e-43, 
8.9683101716788293e-44,
+    4.4841550858394146e-44, 2.2420775429197073e-44, 1.1210387714598537e-44, 
5.6051938572992683e-45,
+    2.8025969286496341e-45, 1.4012984643248171e-45, 7.0064923216240854e-46, 
3.5032461608120427e-46,
+    1.7516230804060213e-46, 8.7581154020301067e-47, 4.3790577010150533e-47, 
2.1895288505075267e-47,
+    1.0947644252537633e-47, 5.4738221262688167e-48, 2.7369110631344083e-48, 
1.3684555315672042e-48,
+    6.8422776578360209e-49, 3.4211388289180104e-49, 1.7105694144590052e-49, 
8.5528470722950261e-50,
+    4.276423536147513e-50, 2.1382117680737565e-50, 1.0691058840368783e-50, 
5.3455294201843913e-51,
+    2.6727647100921956e-51, 1.3363823550460978e-51, 6.6819117752304891e-52, 
3.3409558876152446e-52,
+    1.6704779438076223e-52, 8.3523897190381114e-53, 4.1761948595190557e-53, 
2.0880974297595278e-53,
+    1.0440487148797639e-53, 5.2202435743988196e-54, 2.6101217871994098e-54, 
1.3050608935997049e-54,
+    6.5253044679985245e-55, 3.2626522339992623e-55, 1.6313261169996311e-55, 
8.1566305849981557e-56,
+    4.0783152924990778e-56, 2.0391576462495389e-56, 1.0195788231247695e-56, 
5.0978941156238473e-57,
+    2.5489470578119236e-57, 1.2744735289059618e-57, 6.3723676445298091e-58, 
3.1861838222649046e-58,
+    1.5930919111324523e-58, 7.9654595556622614e-59, 3.9827297778311307e-59, 
1.9913648889155653e-59,
+    9.9568244445778267e-60, 4.9784122222889134e-60, 2.4892061111444567e-60, 
1.2446030555722283e-60,
+    6.2230152778611417e-61, 3.1115076389305709e-61, 1.5557538194652854e-61, 
7.7787690973264271e-62,
+    3.8893845486632136e-62, 1.9446922743316068e-62, 9.7234613716580339e-63, 
4.861730685829017e-63,
+    2.4308653429145085e-63, 1.2154326714572542e-63, 6.0771633572862712e-64, 
3.0385816786431356e-64,
+    1.5192908393215678e-64, 7.596454196607839e-65, 3.7982270983039195e-65, 
1.8991135491519597e-65,
+    9.4955677457597987e-66, 4.7477838728798994e-66, 2.3738919364399497e-66, 
1.1869459682199748e-66,
+    5.9347298410998742e-67, 2.9673649205499371e-67, 1.4836824602749686e-67, 
7.4184123013748428e-68,
+    3.7092061506874214e-68, 1.8546030753437107e-68, 9.2730153767185535e-69, 
4.6365076883592767e-69,
+    2.3182538441796384e-69, 1.1591269220898192e-69, 5.7956346104490959e-70, 
2.897817305224548e-70,
+    1.448908652612274e-70, 7.2445432630613699e-71, 3.6222716315306849e-71, 
1.8111358157653425e-71,
+    9.0556790788267124e-72, 4.5278395394133562e-72, 2.2639197697066781e-72, 
1.131959884853339e-72,
+    5.6597994242666952e-73, 2.8298997121333476e-73, 1.4149498560666738e-73, 
7.074749280333369e-74,
+    3.5373746401666845e-74, 1.7686873200833423e-74, 8.8434366004167113e-75, 
4.4217183002083556e-75,
+    2.2108591501041778e-75, 1.1054295750520889e-75, 5.5271478752604446e-76, 
2.7635739376302223e-76,
+    1.3817869688151111e-76, 6.9089348440755557e-77, 3.4544674220377779e-77, 
1.7272337110188889e-77
+];
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn assert_inv_pow2_table() {
+        for i in 0u8..=255 {
+            // expected value
+            let expected = INVERSE_POWERS_OF_2[i as usize];
+
+            // computed value
+            let computed = 2f64.powf(-(i as f64));
+
+            assert_eq!(expected, computed);
+        }
+    }
+}
diff --git a/datasketches/src/common/mod.rs b/datasketches/src/common/mod.rs
index 017ddc9..a43373a 100644
--- a/datasketches/src/common/mod.rs
+++ b/datasketches/src/common/mod.rs
@@ -25,3 +25,17 @@ pub use self::resize::ResizeFactor;
 
 // private to datasketches crate
 pub(crate) mod binomial_bounds;
+pub(crate) mod inv_pow2_table;
+
+/// Canonicalize double value for compatibility with Java
+pub(crate) fn canonical_double(value: f64) -> u64 {
+    if value.is_nan() {
+        // Java's Double.doubleToLongBits() NaN value
+        0x7ff8000000000000u64
+    } else {
+        // -0.0 + 0.0 == +0.0 under IEEE754 roundTiesToEven rounding mode,
+        // which Rust guarantees. Thus, by adding a positive zero we
+        // canonicalize signed zero without any branches in one instruction.
+        (value + 0.0).to_bits()
+    }
+}
diff --git a/datasketches/src/common/num_std_dev.rs 
b/datasketches/src/common/num_std_dev.rs
index 5cae235..92cbddd 100644
--- a/datasketches/src/common/num_std_dev.rs
+++ b/datasketches/src/common/num_std_dev.rs
@@ -50,4 +50,9 @@ impl NumStdDev {
     pub const fn tail_probability(&self) -> f64 {
         DELTA_OF_NUM_STD_DEVS[*self as usize]
     }
+
+    /// Returns the number of standard deviations as an `u8`.
+    pub const fn as_u8(&self) -> u8 {
+        *self as u8
+    }
 }
diff --git a/datasketches/src/cpc/estimator.rs 
b/datasketches/src/cpc/estimator.rs
new file mode 100644
index 0000000..6d37e6e
--- /dev/null
+++ b/datasketches/src/cpc/estimator.rs
@@ -0,0 +1,403 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::f64::consts::LN_2;
+
+use crate::common::NumStdDev;
+
+const ICON_ERROR_CONSTANT: f64 = LN_2;
+
+const ICON_LOW_SIDE_DATA: [u16; 33] = [
+    //1,    2,    3,   kappa
+    //                 lgK num trials
+    6037, 5720, 5328, // 4 1000000
+    6411, 6262, 5682, // 5 1000000
+    6724, 6403, 6127, // 6 1000000
+    6665, 6411, 6208, // 7 1000000
+    6959, 6525, 6427, // 8 1000000
+    6892, 6665, 6619, // 9 1000000
+    6792, 6752, 6690, // 10 1000000
+    6899, 6818, 6708, // 11 1000000
+    6871, 6845, 6812, // 12 1046369
+    6909, 6861, 6828, // 13 1043411
+    6919, 6897, 6842, // 14 1000297
+];
+
+const ICON_HIGH_SIDE_DATA: [u16; 33] = [
+    //1,    2,    3,   kappa
+    //                 lgK num trials
+    8031, 8559, 9309, // 4 1000000
+    7084, 7959, 8660, // 5 1000000
+    7141, 7514, 7876, // 6 1000000
+    7458, 7430, 7572, // 7 1000000
+    6892, 7141, 7497, // 8 1000000
+    6889, 7132, 7290, // 9 1000000
+    7075, 7118, 7185, // 10 1000000
+    7040, 7047, 7085, // 11 1000000
+    6993, 7019, 7053, // 12 1046369
+    6953, 7001, 6983, // 13 1043411
+    6944, 6966, 7004, // 14 1000297
+];
+
+#[allow(clippy::excessive_precision)]
+const HIP_ERROR_CONSTANT: f64 = 0.588705011257737332; // (LN_2 / 2.0).sqrt()
+
+const HIP_LOW_SIDE_DATA: [u16; 33] = [
+    //1,    2,    3,   kappa
+    //                 lgK num trials
+    5871, 5247, 4826, // 4 1000000
+    5877, 5403, 5070, // 5 1000000
+    5873, 5533, 5304, // 6 1000000
+    5878, 5632, 5464, // 7 1000000
+    5874, 5690, 5564, // 8 1000000
+    5880, 5745, 5619, // 9 1000000
+    5875, 5784, 5701, // 10 1000000
+    5866, 5789, 5742, // 11 1000000
+    5869, 5827, 5784, // 12 1046369
+    5876, 5860, 5827, // 13 1043411
+    5881, 5853, 5842, // 14 1000297
+];
+
+const HIP_HIGH_SIDE_DATA: [u16; 33] = [
+    //1,    2,    3,   kappa
+    //                 lgK num trials
+    5855, 6688, 7391, // 4 1000000
+    5886, 6444, 6923, // 5 1000000
+    5885, 6254, 6594, // 6 1000000
+    5889, 6134, 6326, // 7 1000000
+    5900, 6072, 6203, // 8 1000000
+    5875, 6005, 6089, // 9 1000000
+    5871, 5980, 6040, // 10 1000000
+    5889, 5941, 6015, // 11 1000000
+    5871, 5926, 5973, // 12 1046369
+    5866, 5901, 5915, // 13 1043411
+    5880, 5914, 5953, // 14 1000297
+];
+
+pub(super) fn icon_confidence_lb(lg_k: u8, num_coupons: u32, kappa: NumStdDev) 
-> f64 {
+    if num_coupons == 0 {
+        return 0.0;
+    }
+
+    let k = (1 << lg_k) as f64;
+    let kappa = kappa.as_u8();
+
+    let mut x = ICON_ERROR_CONSTANT;
+    if lg_k <= 14 {
+        let idx = (3 * (lg_k - 4) + (kappa - 1)) as usize;
+        x = (ICON_HIGH_SIDE_DATA[idx] as f64) / 10000.0;
+    }
+    let rel = x / k.sqrt();
+    let eps = (kappa as f64) * rel;
+    let est = icon_estimate(lg_k, num_coupons);
+    let result = est / (1.0 + eps);
+    if result < (num_coupons as f64) {
+        num_coupons as f64
+    } else {
+        result
+    }
+}
+
+pub(super) fn icon_confidence_ub(lg_k: u8, num_coupons: u32, kappa: NumStdDev) 
-> f64 {
+    if num_coupons == 0 {
+        return 0.0;
+    }
+
+    let k = (1 << lg_k) as f64;
+    let kappa = kappa.as_u8();
+
+    let mut x = ICON_ERROR_CONSTANT;
+    if lg_k <= 14 {
+        let idx = (3 * (lg_k - 4) + (kappa - 1)) as usize;
+        x = (ICON_LOW_SIDE_DATA[idx] as f64) / 10000.0;
+    }
+    let rel = x / k.sqrt();
+    let eps = (kappa as f64) * rel;
+    let est = icon_estimate(lg_k, num_coupons);
+    let result = est / (1.0 - eps);
+    result.ceil() // slight widening of interval to be conservative
+}
+
+pub(super) fn hip_confidence_lb(
+    lg_k: u8,
+    num_coupons: u32,
+    hip_est_accum: f64,
+    kappa: NumStdDev,
+) -> f64 {
+    if num_coupons == 0 {
+        return 0.0;
+    }
+
+    let k = (1 << lg_k) as f64;
+    let kappa = kappa.as_u8();
+
+    let mut x = HIP_ERROR_CONSTANT;
+    if lg_k <= 14 {
+        let idx = (3 * (lg_k - 4) + (kappa - 1)) as usize;
+        x = (HIP_HIGH_SIDE_DATA[idx] as f64) / 10000.0;
+    }
+    let rel = x / k.sqrt();
+    let eps = (kappa as f64) * rel;
+    let result = hip_est_accum / (1.0 + eps);
+    if result < (num_coupons as f64) {
+        num_coupons as f64
+    } else {
+        result
+    }
+}
+
+pub(super) fn hip_confidence_ub(
+    lg_k: u8,
+    num_coupons: u32,
+    hip_est_accum: f64,
+    kappa: NumStdDev,
+) -> f64 {
+    if num_coupons == 0 {
+        return 0.0;
+    }
+
+    let k = (1 << lg_k) as f64;
+    let kappa = kappa.as_u8();
+
+    let mut x = HIP_ERROR_CONSTANT;
+    if lg_k <= 14 {
+        let idx = (3 * (lg_k - 4) + (kappa - 1)) as usize;
+        x = (HIP_LOW_SIDE_DATA[idx] as f64) / 10000.0;
+    }
+    let rel = x / k.sqrt();
+    let eps = (kappa as f64) * rel;
+    let result = hip_est_accum / (1.0 - eps);
+    result.ceil() // widening for coverage
+}
+
+// The ICON estimator for FM85 sketches is defined by the arXiv paper.
+//
+// The current file provides exact and approximate implementations of this 
estimator. The exact
+// version works for any value of K, but is quite slow.
+//
+// The much faster approximate version works for K values that are powers of 
two ranging from 2^4
+// to 2^32.
+//
+// At a high-level, this approximation can be described as using an 
exponential approximation when
+// C > K * (5.6 or 5.7), while smaller values of C are handled by a degree-19 
polynomial
+// approximation of a pre-conditioned version of the true ICON mapping from C 
to N_hat.
+//
+// This file also provides a validation procedure that compares its 
approximate and exact
+// implementations of the FM85 ICON estimator.
+
+const ICON_MIN_LOG_K: usize = 4;
+const ICON_MAX_LOG_K: usize = 26;
+const ICON_POLYNOMIAL_DEGREE: usize = 19;
+const ICON_POLYNOMIAL_NUM_COEFFICIENTS: usize = 1 + ICON_POLYNOMIAL_DEGREE;
+const ICON_TABLE_SIZE: usize =
+    ICON_POLYNOMIAL_NUM_COEFFICIENTS * (1 + (ICON_MAX_LOG_K - ICON_MIN_LOG_K));
+
+#[rustfmt::skip]
+#[allow(clippy::excessive_precision)]
+const ICON_POLYNOMIAL_COEFFICIENTS: [f64; ICON_TABLE_SIZE] = [
+    // log K = 4
+    0.9895027971889700513, 0.3319496644645180128, 0.1242818722715769986, 
-0.03324149686026930256, -0.2985637298081619817,
+    1.366555923595830002, -4.705499366260569971, 11.61506432505530029, 
-21.11254986175579873, 28.89421695078809904,
+    -30.1383659011730991, 24.11946778830730054, -14.83391445199539938, 
6.983088767267210173, -2.48964120264876998,
+    0.6593243603602499947, -0.125493534558034997, 0.01620971672896159843, 
-0.001271267679036929953, 4.567178653294529745e-05,
+
+    // log K = 5
+    0.9947713741300230339, 0.3326559581620939787, 0.1250050661634889981, 
-0.04130073804472530336, -0.2584095537451129854,
+    1.218050389433120051, -4.319106696095399656, 10.87175052045090062, 
-20.0184979022142997, 27.63210188163320069,
+    -28.97950009664030091, 23.26740804691930009, -14.33375703270860058, 
6.751281271241110105, -2.406363094133439962,
+    0.6367414734718820357, -0.1210468076141379967, 0.01561196698118279963, 
-0.001222335432128580056, 4.383502970318410206e-05,
+
+    // log K = 6
+    0.9973904854982870161, 0.3330148852217920119, 0.125251536589509993, 
-0.04434075124043219962, -0.2436238890691720116,
+    1.163293254754570016, -4.177758779777369647, 10.60301981340099964, 
-19.6274507428828997, 27.18420839597660077,
+    -28.56827214174580121, 22.96268674086600114, -14.15234202220280046, 
6.665700662642549901, -2.375043356720739851,
+    0.6280993991240929608, -0.119319019358031006, 0.01537674055733759954, 
-0.001202881695730769916, 4.309894633186929849e-05,
+
+    // log K = 7
+    0.9986963310058679655, 0.3331956705633329907, 0.125337696770523005, 
-0.04546817338088020299, -0.2386752211125199863,
+    1.145927328111949972, -4.135694445582720036, 10.52805060502839929, 
-19.52408322548339825, 27.06921653903929936,
+    -28.46207532143190022, 22.88083524357429965, -14.10057147392659971, 
6.63958754983273991, -2.364865219283200037,
+    0.6251341806425250169, -0.1186991327450530043, 0.0152892726403408008, 
-0.001195439764873199896, 4.281098416794090072e-05,
+
+    // log K = 8
+    0.999348600452531044, 0.3332480372393080148, 0.126666900963325002, 
-0.06495714694254159371, -0.08376282050638980681,
+    0.3760158094643630267, -1.568204791601850001, 4.483117719555970382, 
-9.119180124379150598, 13.65799293358900002,
+    -15.3100211234349004, 12.97546344654869976, -8.351661538536939489, 
4.075022612435580172, -1.49387015887069996,
+    0.4040976870253379927, -0.07813232681879349328, 0.01020545649538820085, 
-0.0008063279210812720381, 2.909334976414100078e-05,
+
+    // log K = 9
+    0.9996743787297059924, 0.3332925779481850093, 0.1267124599259649986, 
-0.06550452970936600228, -0.08191738117533520214,
+    0.3773034458363569987, -1.604679509609959975, 4.636761898691969641, 
-9.487348609558699408, 14.25164235443030059,
+    -15.99674955529870068, 13.56353219046370029, -8.730194904342459594, 
4.259010067932120336, -1.56106689792022002,
+    0.4222540912786589828, -0.08165296504921559784, 0.01066878484925220041, 
-0.0008433887618256910015, 3.045339724886519912e-05,
+
+    // log K = 10
+    0.999837191783945034, 0.3333142252339619804, 0.1267759538087240012, 
-0.06631005632753710077, -0.07692759158286699428,
+    0.3568943956395980166, -1.546598721379510044, 4.51595019978557044, 
-9.298431968763770428, 14.02586858080080034,
+    -15.78858959520439953, 13.41484931677589998, -8.647958125130809748, 
4.22398017468472009, -1.549708891200570093,
+    0.419507410264540026, -0.08117411611046250475, 0.01061202286184199928, 
-0.000839300527596772007, 3.03185874520205985e-05,
+
+    // log K = 11
+    0.9999186020796150265, 0.3333249054574359826, 0.126791713589799987, 
-0.06662487271699729652, -0.07335552427910230211,
+    0.3316370184815959909, -1.434143797561290068, 4.180260309967409604, 
-8.593906870708760692, 12.95088874800289958,
+    -14.56876092520539956, 12.37074367531410068, -7.969152075707960137, 
3.888774396648960074, -1.424923326506990051,
+    0.385084561785229984, -0.07435541911616409816, 0.009695363567476529554, 
-0.0007644375960047160388, 2.75156194717188011e-05,
+
+    // log K = 12
+    0.9999592955649559967, 0.3333310560725140093, 0.1267379744020450116, 
-0.06524495415766619344, -0.08854031542298740343,
+    0.4244320628874230228, -1.794077789033230008, 5.133875262768450298, 
-10.40149374917120007, 15.47808115629240078,
+    -17.2272296137545986, 14.5002173676463002, -9.274819801602760094, 
4.500782540026570189, -1.642359389030050076,
+    0.442596113445525019, -0.0853226219238850947, 0.01111969379054169975, 
-0.0008771614088006969611, 3.161668519459719752e-05,
+
+    // log K = 13
+    0.9999796468102559732, 0.3333336602394039727, 0.126728089053198989, 
-0.06503798598282370391, -0.09050261023823169548,
+    0.4350609244189960201, -1.831274835815670077, 5.223387516985289913, 
-10.55574395269979959, 15.67359470222429962,
+    -17.41263416341029924, 14.63297400889229927, -9.346752431221359458, 
4.530124905188380069, -1.651245566462089975,
+    0.444542549250713015, -0.08561720963336499901, 0.01114805146185449992, 
-0.0008786251203363140043, 3.16416341644572998e-05,
+
+    // log K = 14
+    0.9999898187060970445, 0.3333362579300819806, 0.1266984078369459976, 
-0.06464561179765909715, -0.09343280886228019777,
+    0.4490702549264070087, -1.878087608052450008, 5.338004322057390283, 
-10.76690603590630069, 15.97069195083200022,
+    -17.73440379943459888, 14.90212518309260048, -9.520506013770420495, 
4.616238931978830173, -1.68364817877918993,
+    0.4536194960681350086, -0.087448605434800597, 0.01139929991331390009, 
-0.0008995891451622229631, 3.244407259782900338e-05,
+
+    // log K = 15
+    0.9999949072549390028, 0.3333376334705290267, 0.126665364358402005, 
-0.06411790034705669439, -0.09776009134670660128,
+    0.4704691112248470253, -1.948021675295769972, 5.497760972696490001, 
-11.03165645315390009, 16.29703330781000048,
+    -18.03851029448010124, 15.11836776139680083, -9.638205179917429533, 
4.665122328753120051, -1.698980686525759953,
+    0.4571799506245269873, -0.08804011353783609828, 0.01146553155965330043, 
-0.0009040455800659569869, 3.257931866957050274e-05,
+
+    // log K = 16
+    0.9999974544793589493, 0.3333381337614599871, 0.1266524862971120102, 
-0.06391676499117690535, -0.09929616211306059592,
+    0.4771390820378790254, -1.965762451227349938, 5.526802350376460282, 
-11.05703067024660058, 16.29535848023060041,
+    -18.00114005075790047, 15.06214012231560062, -9.58874727382628933, 
4.63537541652793017, -1.686222848555620102,
+    0.4532602373715179933, -0.08719448925964939923, 0.01134365425717459921, 
-0.0008934965241274289835, 3.216436244471380105e-05,
+
+    // log K = 17
+    0.9999987278278800185, 0.3333383411464330148, 0.126642761751724009, 
-0.06371042959073920653, -0.1013564516034080043,
+    0.4891311195679299839, -2.010971712051409899, 5.644390807952309963, 
-11.27697253921500042, 16.59957157207080058,
+    -18.31808338317799922, 15.31363518393730061, -9.741451446816620674, 
4.706207545519429658, -1.711102469010010063,
+    0.4597587341089349744, -0.08841670767182820134, 0.01149999225097850068, 
-0.0009056651366963050422, 3.259910736274500059e-05,
+
+    // log K = 18
+    0.9999993637727100371, 0.3333385511608860097, 0.1266341580529160016, 
-0.06353272828164230335, -0.103139962850642003,
+    0.4996216017206500104, -2.05099128585287982, 5.749874086531799655, 
-11.47727638570349917, 16.88141587810320132,
+    -18.61744656177490143, 15.55634230427719977, -9.892350736128680211, 
4.778033520984200422, -1.737045483861280104,
+    0.4667410882683730167, -0.08977256212421590165, 0.01167940146667079994, 
-0.0009201381242396030127, 3.313600701586759867e-05,
+
+    // log K = 19
+    0.9999996805376010212, 0.3333372324328989778, 0.1267104737214659882, 
-0.06504749929326139601, -0.0882341962464350954,
+    0.4131871162041140244, -1.725190703567099915, 4.900817515593920426, 
-9.883452720776510603, 14.6657081190816001,
+    -16.29398295135089825, 13.69805011761319946, -8.753475239465899449, 
4.244072374564439976, -1.547202527706629915,
+    0.4164770109614310267, -0.08017596922092029565, 0.01043146101701039954, 
-0.00082124200571200305, 2.953319493719429935e-05,
+
+    // log K = 20
+    0.9999998390037539986, 0.3333365859956040067, 0.1267460211029839967, 
-0.06569456024647769843, -0.0823070353477164951,
+    0.3810826463303410017, -1.611983580241109992, 4.624520077758210057, 
-9.397308335633589138, 14.03184981378050011,
+    -15.6703191315401007, 13.22992718704790072, -8.484216393184780713, 
4.125607133488029987, -1.507690650697159906,
+    0.4066678517577320129, -0.07842110121777939868, 0.01021780862225150042, 
-0.0008054065857047439754, 2.899431830426989844e-05,
+
+    // log K = 21
+    0.9999999207001479817, 0.3333384953015239849, 0.1266331480396669928, 
-0.06345750166298599892, -0.1042341210992499961,
+    0.5077112908497130039, -2.087398133609810191, 5.858842546192500222, 
-11.70620319777190055, 17.23103975433669888,
+    -19.01462552846669851, 15.89674059836560005, -10.11395134034419918, 
4.88760796465891989, -1.777886770904629987,
+    0.4780200178339499839, -0.09200895321782050218, 0.01198029553244219989, 
-0.0009447283875782100165, 3.405716775824710232e-05,
+
+    // log K = 22
+    0.9999999606908690497, 0.3333383929524300071, 0.1266456445096819927, 
-0.06373504294081690225, -0.1012834291081849969,
+    0.4893810690172959998, -2.01391428223606983, 5.656430437473649597, 
-11.3067201537791, 16.64980594135310099,
+    -18.3792355790383013, 15.36879753115040081, -9.778831246425049528, 
4.725308061988969577, -1.718423596500280093,
+    0.4618308177809870019, -0.08883675060799739454, 0.01155766944804260087, 
-0.0009104695617243750358, 3.278237729674439666e-05,
+
+    // log K = 23
+    0.9999999794683379628, 0.3333386441751680085, 0.1266463995182049995, 
-0.06376031920455070556, -0.1010799540803130059,
+    0.488540137426137, -2.012048323537570127, 5.654949475342659682, 
-11.31023240892979942, 16.66334675284959843,
+    -18.40241452866079896, 15.39443572867130072, -9.798844412838670692, 
4.736683907539640082, -1.723168363744929987,
+    0.463270349018644001, -0.08914619066708899531, 0.01160235936257320022, 
-0.0009143600818183229709, 3.293669304679140117e-05,
+
+    // log K = 24
+    0.9999999911469820146, 0.3333376076934529975, 0.1266944349940530012, 
-0.06470524278387919381, -0.09189342220283110152,
+    0.4359182372694809793, -1.815980282951169977, 5.149474056470340066, 
-10.37086570678100017, 15.36962686758569951,
+    -17.05756384717849983, 14.32755177515199918, -9.149944050025640152, 
4.434601894497260055, -1.616478926806520056,
+    0.4351979157055039793, -0.08381768225272340223, 0.01091321820476520016, 
-0.0008600264403629039739, 3.09667800347144002e-05,
+
+    // log K = 25
+    0.9999999968592140354, 0.3333379164881000167, 0.1266782495827009913, 
-0.06434163088961859789, -0.09575258124988890451,
+    0.4597843575354370049, -1.911374431241559924, 5.411856661251520428, 
-10.88850084646090011, 16.12298941380269923,
+    -17.88172178487259956, 15.01301780636859995, -9.585542896142529301, 
4.645811872761620442, -1.693952293156189892,
+    0.4563143308861309921, -0.08795976148455289523, 0.01146560428011200033, 
-0.0009048442931930629528, 3.26358391497329992e-05,
+
+    // log K = 26
+    0.9999999970700530483, 0.333338329556315982, 0.126644753076394001, 
-0.06372365346512399997, -0.1012760856945769949,
+    0.4886852278576360176, -2.009005418394389952, 5.638119224137019714, 
-11.26276715335160006, 16.57640024218650154,
+    -18.29035093605569884, 15.28892246224570073, -9.724916375991760731, 
4.6978877652334603, -1.707974125916829955,
+    0.4588937864564729963, -0.08824617586088029375, 0.01147732114826570046, 
-0.00090384524860747295, 3.253252703695579795e-05,
+];
+
+fn evaluate_polynomial(coefficients: &[f64], start: usize, num: usize, x: f64) 
-> f64 {
+    let end = start + num - 1;
+    let mut total = coefficients[end];
+    for i in (start..end).rev() {
+        total *= x;
+        total += coefficients[i];
+    }
+    total
+}
+
+fn icon_exponential_approximation(k: f64, c: f64) -> f64 {
+    0.7940236163830469 * k * 2f64.powf(c / k)
+}
+
+pub(super) fn icon_estimate(lg_k: u8, num_coupons: u32) -> f64 {
+    let lg_k = lg_k as usize;
+    assert!(
+        (ICON_MIN_LOG_K..=ICON_MAX_LOG_K).contains(&lg_k),
+        "lg_k out of range; got {lg_k}",
+    );
+
+    match num_coupons {
+        0 => return 0.0,
+        1 => return 1.0,
+        _ => {}
+    }
+
+    let k = (1 << lg_k) as f64;
+    let c = num_coupons as f64;
+
+    // Differing thresholds ensure that the approximated estimator is 
monotonically increasing.
+    let threshold_factor = if lg_k < 14 { 5.7 } else { 5.6 };
+    if c > threshold_factor * k {
+        return icon_exponential_approximation(k, c);
+    }
+
+    let factor = evaluate_polynomial(
+        &ICON_POLYNOMIAL_COEFFICIENTS,
+        ICON_POLYNOMIAL_NUM_COEFFICIENTS * (lg_k - ICON_MIN_LOG_K),
+        ICON_POLYNOMIAL_NUM_COEFFICIENTS,
+        // The constant 2.0 is baked into the table 
ICON_POLYNOMIAL_COEFFICIENTS.
+        // This factor, although somewhat arbitrary, is based on extensive 
characterization studies
+        // and is considered a safe conservative factor.
+        c / (2.0 * k),
+    );
+    let ratio = c / k;
+    // The constant 66.774757 is baked into the table 
ICON_POLYNOMIAL_COEFFICIENTS.
+    // This factor, although somewhat arbitrary, is based on extensive 
characterization studies
+    // and is considered a safe conservative factor.
+    let term = 1.0 + (ratio * ratio * ratio / 66.774757);
+    let result = c * factor * term;
+    if result >= c { result } else { c }
+}
diff --git a/datasketches/src/cpc/kxp_byte_lookup.rs 
b/datasketches/src/cpc/kxp_byte_lookup.rs
new file mode 100644
index 0000000..25fddf6
--- /dev/null
+++ b/datasketches/src/cpc/kxp_byte_lookup.rs
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub(super) const KXP_BYTE_TABLE: [f64; 256] = [
+    0.99609375, 0.49609375, 0.74609375, 0.24609375, 0.87109375, 0.37109375, 
0.62109375, 0.12109375,
+    0.93359375, 0.43359375, 0.68359375, 0.18359375, 0.80859375, 0.30859375, 
0.55859375, 0.05859375,
+    0.96484375, 0.46484375, 0.71484375, 0.21484375, 0.83984375, 0.33984375, 
0.58984375, 0.08984375,
+    0.90234375, 0.40234375, 0.65234375, 0.15234375, 0.77734375, 0.27734375, 
0.52734375, 0.02734375,
+    0.98046875, 0.48046875, 0.73046875, 0.23046875, 0.85546875, 0.35546875, 
0.60546875, 0.10546875,
+    0.91796875, 0.41796875, 0.66796875, 0.16796875, 0.79296875, 0.29296875, 
0.54296875, 0.04296875,
+    0.94921875, 0.44921875, 0.69921875, 0.19921875, 0.82421875, 0.32421875, 
0.57421875, 0.07421875,
+    0.88671875, 0.38671875, 0.63671875, 0.13671875, 0.76171875, 0.26171875, 
0.51171875, 0.01171875,
+    0.98828125, 0.48828125, 0.73828125, 0.23828125, 0.86328125, 0.36328125, 
0.61328125, 0.11328125,
+    0.92578125, 0.42578125, 0.67578125, 0.17578125, 0.80078125, 0.30078125, 
0.55078125, 0.05078125,
+    0.95703125, 0.45703125, 0.70703125, 0.20703125, 0.83203125, 0.33203125, 
0.58203125, 0.08203125,
+    0.89453125, 0.39453125, 0.64453125, 0.14453125, 0.76953125, 0.26953125, 
0.51953125, 0.01953125,
+    0.97265625, 0.47265625, 0.72265625, 0.22265625, 0.84765625, 0.34765625, 
0.59765625, 0.09765625,
+    0.91015625, 0.41015625, 0.66015625, 0.16015625, 0.78515625, 0.28515625, 
0.53515625, 0.03515625,
+    0.94140625, 0.44140625, 0.69140625, 0.19140625, 0.81640625, 0.31640625, 
0.56640625, 0.06640625,
+    0.87890625, 0.37890625, 0.62890625, 0.12890625, 0.75390625, 0.25390625, 
0.50390625, 0.00390625,
+    0.9921875, 0.4921875, 0.7421875, 0.2421875, 0.8671875, 0.3671875, 
0.6171875, 0.1171875,
+    0.9296875, 0.4296875, 0.6796875, 0.1796875, 0.8046875, 0.3046875, 
0.5546875, 0.0546875,
+    0.9609375, 0.4609375, 0.7109375, 0.2109375, 0.8359375, 0.3359375, 
0.5859375, 0.0859375,
+    0.8984375, 0.3984375, 0.6484375, 0.1484375, 0.7734375, 0.2734375, 
0.5234375, 0.0234375,
+    0.9765625, 0.4765625, 0.7265625, 0.2265625, 0.8515625, 0.3515625, 
0.6015625, 0.1015625,
+    0.9140625, 0.4140625, 0.6640625, 0.1640625, 0.7890625, 0.2890625, 
0.5390625, 0.0390625,
+    0.9453125, 0.4453125, 0.6953125, 0.1953125, 0.8203125, 0.3203125, 
0.5703125, 0.0703125,
+    0.8828125, 0.3828125, 0.6328125, 0.1328125, 0.7578125, 0.2578125, 
0.5078125, 0.0078125,
+    0.984375, 0.484375, 0.734375, 0.234375, 0.859375, 0.359375, 0.609375, 
0.109375, 0.921875,
+    0.421875, 0.671875, 0.171875, 0.796875, 0.296875, 0.546875, 0.046875, 
0.953125, 0.453125,
+    0.703125, 0.203125, 0.828125, 0.328125, 0.578125, 0.078125, 0.890625, 
0.390625, 0.640625,
+    0.140625, 0.765625, 0.265625, 0.515625, 0.015625, 0.96875, 0.46875, 
0.71875, 0.21875, 0.84375,
+    0.34375, 0.59375, 0.09375, 0.90625, 0.40625, 0.65625, 0.15625, 0.78125, 
0.28125, 0.53125,
+    0.03125, 0.9375, 0.4375, 0.6875, 0.1875, 0.8125, 0.3125, 0.5625, 0.0625, 
0.875, 0.375, 0.625,
+    0.125, 0.75, 0.25, 0.5, 0.0,
+];
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::common::inv_pow2_table::INVERSE_POWERS_OF_2;
+
+    #[test]
+    fn assert_kxp_byte_table() {
+        for byte in 0u8..=255 {
+            // expected value
+            let expected = KXP_BYTE_TABLE[byte as usize];
+
+            // computed value
+            let mut computed = 0.0;
+            for col in 0..8 {
+                let bit = (byte >> col) & 1;
+                if bit == 0 {
+                    // note the inverted logic
+                    computed += INVERSE_POWERS_OF_2[col + 1]; //note the "+1"
+                }
+            }
+
+            assert_eq!(expected, computed);
+        }
+    }
+}
diff --git a/datasketches/src/cpc/mod.rs b/datasketches/src/cpc/mod.rs
new file mode 100644
index 0000000..4c9fea2
--- /dev/null
+++ b/datasketches/src/cpc/mod.rs
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#![allow(dead_code)]
+
+//! Compressed Probabilistic Counting sketch.
+//!
+//! This is a unique-counting sketch that implements the Compressed 
Probabilistic Counting (CPC,
+//! a.k.a. FM85) algorithms developed by Kevin Lang in his paper [Back to the 
Future: an Even More
+//! Nearly Optimal Cardinality Estimation 
Algorithm](https://arxiv.org/abs/1708.06839).
+//!
+//! This sketch is extremely space-efficient when serialized. In an 
apples-to-apples empirical
+//! comparison against compressed HyperLogLog sketches, this new algorithm 
simultaneously wins on
+//! the two dimensions of the space/accuracy tradeoff and produces sketches 
that are smaller than
+//! the entropy of HLL, so no possible implementation of compressed HLL can 
match its space
+//! efficiency for a given accuracy. As described in the paper this sketch 
implements a newly
+//! developed ICON estimator algorithm that survives union operations, another 
well-known estimator,
+//! the [Historical Inverse Probability 
(HIP)](https://arxiv.org/abs/1306.3284) estimator does not.
+//!
+//! The update speed performance of this sketch is quite fast and is 
comparable to the speed of HLL.
+//! The union (merging) capability of this sketch also allows for merging of 
sketches with different
+//! configurations of K.
+//!
+//! For additional security this sketch can be configured with a 
user-specified hash seed.
+
+mod estimator;
+mod kxp_byte_lookup;
+mod pair_table;
+mod sketch;
+mod union;
+
+pub use self::sketch::CpcSketch;
+pub use self::union::CpcUnion;
+
+/// Default log2 of K.
+const DEFAULT_LG_K: u8 = 11;
+/// Min log2 of K.
+const MIN_LG_K: usize = 4;
+/// Max log2 of K.
+const MAX_LG_K: usize = 26;
+
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+#[expect(clippy::upper_case_acronyms)]
+enum Flavor {
+    EMPTY,   //    0  == C <    1
+    SPARSE,  //    1  <= C <   3K/32
+    HYBRID,  // 3K/32 <= C <   K/2
+    PINNED,  //   K/2 <= C < 27K/8  [NB: 27/8 = 3 + 3/8]
+    SLIDING, // 27K/8 <= C
+}
+
+fn count_bits_set_in_matrix(matrix: &[u64]) -> u32 {
+    let mut count = 0;
+    for word in matrix {
+        count += word.count_ones();
+    }
+    count
+}
+
+fn determine_flavor(lg_k: u8, num_coupons: u32) -> Flavor {
+    let k = 1 << lg_k;
+    let c2 = num_coupons << 1;
+    let c8 = num_coupons << 3;
+    let c32 = num_coupons << 5;
+    if num_coupons == 0 {
+        Flavor::EMPTY
+    } else if c32 < (3 * k) {
+        Flavor::SPARSE
+    } else if c2 < k {
+        Flavor::HYBRID
+    } else if c8 < (27 * k) {
+        Flavor::PINNED
+    } else {
+        Flavor::SLIDING
+    }
+}
+
+fn determine_correct_offset(lg_k: u8, num_coupons: u32) -> u8 {
+    let k = 1 << lg_k;
+    let tmp = ((num_coupons as i64) << 3) - (19 * k); // 8C - 19K
+    if tmp < 0 {
+        0
+    } else {
+        (tmp >> (lg_k + 3)) as u8 // tmp / 8K
+    }
+}
diff --git a/datasketches/src/cpc/pair_table.rs 
b/datasketches/src/cpc/pair_table.rs
new file mode 100644
index 0000000..2af1859
--- /dev/null
+++ b/datasketches/src/cpc/pair_table.rs
@@ -0,0 +1,346 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+const UPSIZE_NUMERATOR: u32 = 3;
+const UPSIZE_DENOMINATOR: u32 = 4;
+const DOWNSIZE_NUMERATOR: u32 = 1;
+const DOWNSIZE_DENOMINATOR: u32 = 4;
+
+/// A highly specialized hash table used for sparse data.
+///
+/// This table stores `(row, col)` pairs and uses linear probing for collision 
resolution. It is
+/// optimized for scenarios where the cardinality of entries is low.
+#[derive(Debug, Clone)]
+pub(super) struct PairTable {
+    /// log2 of number of slots
+    lg_size: u8,
+    num_valid_bits: u8,
+    num_items: u32,
+    slots: Vec<u32>,
+}
+
+impl PairTable {
+    pub fn new(lg_size: u8, num_valid_bits: u8) -> Self {
+        assert!(
+            (2..=26).contains(&lg_size),
+            "lg_size must be in [2, 26], got {lg_size}"
+        );
+        assert!(
+            ((lg_size + 1)..=32).contains(&num_valid_bits),
+            "num_valid_bits must be in [lg_size + 1, 32], got {num_valid_bits} 
where lg_size = {lg_size}"
+        );
+        Self {
+            lg_size,
+            num_valid_bits,
+            num_items: 0,
+            slots: vec![u32::MAX; 1 << lg_size],
+        }
+    }
+
+    /// A constructor specifically tailored to be a part of FM85 decompression 
scheme.
+    pub fn from_slots(lg_size: u8, num_items: u32, slots: Vec<u32>) -> Self {
+        let mut lg_num_slots = 2;
+        while UPSIZE_DENOMINATOR * num_items > (UPSIZE_NUMERATOR * (1 << 
lg_num_slots)) {
+            lg_num_slots += 1;
+        }
+
+        let mut table = Self::new(lg_num_slots, 6 + lg_size);
+
+        // Note: there is a possible "snowplow effect" here because the caller 
is passing in a
+        // sorted pairs array. However, we are starting out with the correct 
final table size, so
+        // the problem might not occur.
+
+        for slot in slots {
+            table.must_insert(slot);
+        }
+        table.num_items = num_items;
+        table
+    }
+
+    pub fn num_items(&self) -> u32 {
+        self.num_items
+    }
+
+    pub fn slots(&self) -> &[u32] {
+        &self.slots
+    }
+
+    pub fn clear(&mut self) {
+        self.slots.fill(u32::MAX);
+        self.num_items = 0;
+    }
+
+    pub fn maybe_delete(&mut self, item: u32) -> bool {
+        let index = self.lookup(item) as usize;
+        if self.slots[index] == u32::MAX {
+            return false;
+        }
+        assert_eq!(
+            self.slots[index], item,
+            "item {item} not found at index {index}"
+        );
+        assert!(self.num_items > 0, "no items to delete");
+
+        // delete the item
+        self.slots[index] = u32::MAX;
+        self.num_items -= 1;
+
+        // re-insert all items between the freed slot and the next empty slot
+        let mask = (1 << self.lg_size) - 1;
+        let mut probe = (index + 1) & mask;
+        let mut fetched = self.slots[probe];
+        while fetched != u32::MAX {
+            self.slots[probe] = u32::MAX;
+            self.must_insert(fetched);
+            probe = (probe + 1) & mask;
+            fetched = self.slots[probe];
+        }
+
+        // shrink if necessary
+        while ((DOWNSIZE_DENOMINATOR * self.num_items) < (DOWNSIZE_NUMERATOR * 
(1 << self.lg_size)))
+            && (self.lg_size > 2)
+        {
+            self.rebuild(self.lg_size - 1);
+        }
+
+        true
+    }
+
+    pub fn maybe_insert(&mut self, item: u32) -> bool {
+        let index = self.lookup(item) as usize;
+        if self.slots[index] == item {
+            return false;
+        }
+        assert_eq!(
+            self.slots[index],
+            u32::MAX,
+            "no empty slot found for item {item} at index {index}"
+        );
+        self.slots[index] = item;
+        self.num_items += 1;
+        while (UPSIZE_DENOMINATOR * self.num_items) > (UPSIZE_NUMERATOR * (1 
<< self.lg_size)) {
+            self.rebuild(self.lg_size + 1);
+        }
+        true
+    }
+
+    /// While extracting the items from a linear probing hashtable, this will 
usually undo the
+    /// wrap-around provided that the table isn't too full.
+    ///
+    /// Experiments suggest that for sufficiently large tables the load factor 
would have to be
+    /// over 90 percent before this would fail frequently, and even then the 
subsequent sort would
+    /// fix things up.
+    ///
+    /// The result is nearly sorted, so make sure to use an efficient sort for 
that case.
+    pub fn unwrapping_get_items(&self) -> Vec<u32> {
+        if self.slots.is_empty() {
+            return vec![];
+        }
+
+        let table_size = 1usize << self.lg_size;
+        let mut result = vec![0; self.num_items as usize];
+        let mut i = 0usize;
+        let mut l = 0usize;
+        let mut r = self.num_items as usize - 1;
+
+        // special rules for the region before the first empty slot
+        let hi_bit = 1 << (self.num_valid_bits - 1);
+        while i < table_size && self.slots[i] != u32::MAX {
+            let item = self.slots[i];
+            i += 1;
+            if (item & hi_bit) != 0 {
+                // this item was probably wrapped, so move to end
+                result[r] = item;
+                r -= 1;
+            } else {
+                result[l] = item;
+                l += 1;
+            }
+        }
+
+        // the rest of the table is processed normally
+        while i < table_size {
+            let item = self.slots[i];
+            i += 1;
+            if item != u32::MAX {
+                result[l] = item;
+                l += 1;
+            }
+        }
+
+        assert_eq!(l, r + 1, "number of items mismatch during extraction");
+        result
+    }
+
+    fn must_insert(&mut self, item: u32) {
+        let index = self.lookup(item) as usize;
+        assert_ne!(
+            self.slots[index], item,
+            "item {item} already present in table"
+        );
+        assert_eq!(
+            self.slots[index],
+            u32::MAX,
+            "no empty slot found for item {item} at index {index}"
+        );
+        self.slots[index] = item;
+        // counts and resizing must be handled by the caller.
+    }
+
+    fn lookup(&self, item: u32) -> u32 {
+        let size = 1 << self.lg_size;
+        let mask = size - 1;
+
+        let shift = self.num_valid_bits - self.lg_size;
+
+        // extract high table size bits
+        let mut probe = item >> shift;
+        assert!(probe <= mask, "probe = {probe}, mask = {mask}");
+
+        loop {
+            let slot = self.slots[probe as usize];
+            if slot != item && slot != u32::MAX {
+                probe = (probe + 1) & mask;
+            } else {
+                break;
+            }
+        }
+
+        probe
+    }
+
+    /// Rebuilds to a larger size. `num_items` and `num_valid_bits` remain 
unchanged.
+    fn rebuild(&mut self, lg_size: u8) {
+        assert!(
+            (2..=26).contains(&lg_size),
+            "lg_size must be in [2, 26], got {lg_size}"
+        );
+        assert!(
+            ((lg_size + 1)..=32).contains(&self.num_valid_bits),
+            "num_valid_bits must be in [lg_size + 1, 32], got {} where lg_size 
= {lg_size}",
+            self.num_valid_bits
+        );
+
+        let new_size = 1u32 << lg_size;
+        assert!(
+            new_size > self.num_items,
+            "new table size ({new_size}) must be larger than number of items 
{}",
+            self.num_items
+        );
+
+        let slots = std::mem::replace(&mut self.slots, vec![u32::MAX; new_size 
as usize]);
+        self.lg_size = lg_size;
+        for slot in slots {
+            if slot != u32::MAX {
+                self.must_insert(slot);
+            }
+        }
+    }
+}
+
+/// This should be used pair with [`PairTable::unwrapping_get_items`].
+///
+/// In applications where the input array is already nearly sorted, insertion 
sort runs in linear
+/// time with a very small constant.
+///
+/// This introspective version of insertion sort protects against the 
quadratic cost of sorting bad
+/// input arrays. It keeps track of how much work has been done, and if that 
exceeds a constant
+/// times the array length, it switches to a different sorting algorithm.
+pub fn introspective_insertion_sort(a: &mut [u32]) {
+    let cost_limit = 8 * a.len();
+
+    let mut cost = 0;
+    for i in 1..a.len() {
+        let mut j = i;
+        let v = a[i];
+        while j >= 1 && v < a[j - 1] {
+            a[j] = a[j - 1];
+            j -= 1;
+        }
+        a[j] = v;
+        cost += i - j; // distance moved is a measure of work
+        if cost > cost_limit {
+            knuth_shell_sort3(a);
+            return;
+        }
+    }
+}
+
+fn knuth_shell_sort3(a: &mut [u32]) {
+    let len = a.len();
+
+    let mut h = 0;
+    while h < len / 9 {
+        h = 3 * h + 1;
+    }
+
+    while h > 0 {
+        for i in h..len {
+            let v = a[i];
+            let mut j = i;
+            while j >= h && v < a[j - h] {
+                a[j] = a[j - h];
+                j -= h;
+            }
+            a[j] = v;
+        }
+        h /= 3;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_sort_1() {
+        let data = (0..10)
+            .map(|_| rand::random_range(0..10000))
+            .collect::<Vec<_>>();
+        let mut sorted = data.clone();
+        introspective_insertion_sort(&mut sorted);
+        assert!(
+            sorted.is_sorted(),
+            "data was not sorted correctly: origin={data:?}, sorted={sorted:?}"
+        );
+    }
+
+    #[test]
+    fn test_sort_2() {
+        let data = (0..10)
+            .map(|_| rand::random_range(0..10000) + 3_000_000_000)
+            .collect::<Vec<_>>();
+        let mut sorted = data.clone();
+        introspective_insertion_sort(&mut sorted);
+        assert!(
+            sorted.is_sorted(),
+            "data was not sorted correctly: origin={data:?}, sorted={sorted:?}"
+        );
+    }
+
+    #[test]
+    fn test_sort_3() {
+        let len = 20;
+        let data = (0..len).map(|i| len - i + 1).collect::<Vec<_>>();
+        let mut sorted = data.clone();
+        introspective_insertion_sort(&mut sorted);
+        assert!(
+            sorted.is_sorted(),
+            "data was not sorted correctly: origin={data:?}, sorted={sorted:?}"
+        );
+    }
+}
diff --git a/datasketches/src/cpc/sketch.rs b/datasketches/src/cpc/sketch.rs
new file mode 100644
index 0000000..6af8bfc
--- /dev/null
+++ b/datasketches/src/cpc/sketch.rs
@@ -0,0 +1,499 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::hash::Hash;
+
+use crate::common::NumStdDev;
+use crate::common::canonical_double;
+use crate::common::inv_pow2_table::INVERSE_POWERS_OF_2;
+use crate::cpc::DEFAULT_LG_K;
+use crate::cpc::Flavor;
+use crate::cpc::MAX_LG_K;
+use crate::cpc::MIN_LG_K;
+use crate::cpc::count_bits_set_in_matrix;
+use crate::cpc::determine_correct_offset;
+use crate::cpc::determine_flavor;
+use crate::cpc::estimator::hip_confidence_lb;
+use crate::cpc::estimator::hip_confidence_ub;
+use crate::cpc::estimator::icon_confidence_lb;
+use crate::cpc::estimator::icon_confidence_ub;
+use crate::cpc::estimator::icon_estimate;
+use crate::cpc::kxp_byte_lookup::KXP_BYTE_TABLE;
+use crate::cpc::pair_table::PairTable;
+use crate::hash::DEFAULT_UPDATE_SEED;
+use crate::hash::MurmurHash3X64128;
+
+/// A Compressed Probabilistic Counting sketch.
+///
+/// See the [module level documentation](super) for more.
+#[derive(Debug, Clone)]
+pub struct CpcSketch {
+    // immutable config variables
+    lg_k: u8,
+    seed: u64,
+
+    // sketch state
+    /// Part of a speed optimization.
+    pub(super) first_interesting_column: u8,
+    /// The number of coupons collected so far.
+    pub(super) num_coupons: u32,
+    /// Sparse and surprising values.
+    pub(super) surprising_value_table: Option<PairTable>,
+    /// Derivable from num_coupons, but made explicit for speed.
+    pub(super) window_offset: u8,
+    /// Size K bytes in dense mode (flavor >= HYBRID).
+    pub(super) sliding_window: Vec<u8>,
+
+    // estimator state
+    /// Whether the sketch is a result of merging.
+    ///
+    /// If `false`, the HIP (Historical Inverse Probability) estimator is used.
+    /// If `true`, the ICON (Inter-Column Optimal) Estimator is fallback in 
use.
+    pub(super) merge_flag: bool,
+    // the following variables are only valid in HIP estimator
+    /// A pre-calculated probability factor (`k * p`) used to compute the 
increment delta.
+    kxp: f64,
+    /// The accumulated cardinality estimate.
+    hip_est_accum: f64,
+}
+
+impl Default for CpcSketch {
+    fn default() -> Self {
+        Self::new(DEFAULT_LG_K)
+    }
+}
+
+impl CpcSketch {
+    /// Creates a new `CpcSketch` with the given `lg_k` and default seed.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `lg_k` is not in the range `[4, 16]`.
+    pub fn new(lg_k: u8) -> Self {
+        Self::with_seed(lg_k, DEFAULT_UPDATE_SEED)
+    }
+
+    /// Creates a new `CpcSketch` with the given `lg_k` and `seed`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `lg_k` is not in the range `[4, 16]`.
+    pub fn with_seed(lg_k: u8, seed: u64) -> Self {
+        assert!(
+            (MIN_LG_K..=MAX_LG_K).contains(&(lg_k as usize)),
+            "lg_k out of range; got {lg_k}",
+        );
+
+        Self {
+            lg_k,
+            seed,
+            first_interesting_column: 0,
+            num_coupons: 0,
+            surprising_value_table: None,
+            window_offset: 0,
+            sliding_window: vec![],
+            merge_flag: false,
+            kxp: (1 << lg_k) as f64,
+            hip_est_accum: 0.0,
+        }
+    }
+
+    /// Return the parameter lg_k.
+    pub fn lg_k(&self) -> u8 {
+        self.lg_k
+    }
+
+    /// Returns the best estimate of the cardinality of the sketch.
+    pub fn estimate(&self) -> f64 {
+        if !self.merge_flag {
+            self.hip_est_accum
+        } else {
+            icon_estimate(self.lg_k, self.num_coupons)
+        }
+    }
+
+    /// Returns the best estimate of the lower bound of the confidence 
interval given `kappa`.
+    pub fn lower_bound(&self, kappa: NumStdDev) -> f64 {
+        if !self.merge_flag {
+            hip_confidence_lb(self.lg_k, self.num_coupons, self.hip_est_accum, 
kappa)
+        } else {
+            icon_confidence_lb(self.lg_k, self.num_coupons, kappa)
+        }
+    }
+
+    /// Returns the best estimate of the upper bound of the confidence 
interval given `kappa`.
+    pub fn upper_bound(&self, kappa: NumStdDev) -> f64 {
+        if !self.merge_flag {
+            hip_confidence_ub(self.lg_k, self.num_coupons, self.hip_est_accum, 
kappa)
+        } else {
+            icon_confidence_ub(self.lg_k, self.num_coupons, kappa)
+        }
+    }
+
+    /// Returns true if the sketch is empty.
+    pub fn is_empty(&self) -> bool {
+        self.num_coupons == 0
+    }
+
+    /// Update the sketch with a hashable value.
+    ///
+    /// For `f32`/`f64` values, use `update_f32`/`update_f64` instead.
+    pub fn update<T: Hash>(&mut self, value: T) {
+        let mut hasher = MurmurHash3X64128::with_seed(self.seed);
+        value.hash(&mut hasher);
+        let (h1, h2) = hasher.finish128();
+
+        let k = 1 << self.lg_k;
+        let col = h2.leading_zeros(); // 0 <= col <= 64
+        let col = if col > 63 { 63 } else { col as u8 }; // clip so that 0 <= 
col <= 63
+        let row = (h1 & (k - 1)) as u32;
+        let mut row_col = (row << 6) | (col as u32);
+        // To avoid the hash table's "empty" value, we change the row of the 
following pair.
+        // This case is extremely unlikely, but we might as well handle it.
+        if row_col == u32::MAX {
+            row_col ^= 1 << 6;
+        }
+        self.row_col_update(row_col);
+    }
+
+    /// Update the sketch with a f64 value.
+    pub fn update_f64(&mut self, value: f64) {
+        // Canonicalize double for compatibility with Java
+        let canonical = canonical_double(value);
+        self.update(canonical);
+    }
+
+    /// Update the sketch with a f32 value.
+    pub fn update_f32(&mut self, value: f32) {
+        self.update_f64(value as f64);
+    }
+
+    pub(super) fn flavor(&self) -> Flavor {
+        determine_flavor(self.lg_k, self.num_coupons)
+    }
+
+    pub(super) fn row_col_update(&mut self, row_col: u32) {
+        let col = (row_col & 63) as u8;
+        if col < self.first_interesting_column {
+            // important speed optimization
+            return;
+        }
+
+        if self.num_coupons == 0 {
+            // promote EMPTY to SPARSE
+            self.surprising_value_table = Some(PairTable::new(2, 6 + 
self.lg_k));
+        }
+
+        if self.sliding_window.is_empty() {
+            self.update_sparse(row_col);
+        } else {
+            self.update_windowed(row_col);
+        }
+    }
+
+    pub(super) fn seed(&self) -> u64 {
+        self.seed
+    }
+
+    pub(super) fn surprising_value_table(&self) -> &PairTable {
+        self.surprising_value_table
+            .as_ref()
+            .expect("surprising value table must be initialized")
+    }
+
+    pub(super) fn mut_surprising_value_table(&mut self) -> &mut PairTable {
+        self.surprising_value_table
+            .as_mut()
+            .expect("surprising value table must be initialized")
+    }
+
+    fn update_hip(&mut self, row_col: u32) {
+        let k = 1 << self.lg_k;
+        let col = (row_col & 63) as usize;
+        let one_over_p = (k as f64) / self.kxp;
+        self.hip_est_accum += one_over_p;
+        self.kxp -= INVERSE_POWERS_OF_2[col + 1] // notice the "+1"
+    }
+
+    fn update_sparse(&mut self, row_col: u32) {
+        let k = 1 << self.lg_k;
+        let c32pre = (self.num_coupons as u64) << 5;
+        debug_assert!(c32pre < 3 * k); // C < 3K/32, in other words, flavor == 
SPARSE
+        let is_novel = self.mut_surprising_value_table().maybe_insert(row_col);
+        if is_novel {
+            self.num_coupons += 1;
+            self.update_hip(row_col);
+            let c32post = (self.num_coupons as u64) << 5;
+            if c32post >= 3 * k {
+                self.promote_sparse_to_windowed();
+            }
+        }
+    }
+
+    fn promote_sparse_to_windowed(&mut self) {
+        debug_assert_eq!(self.window_offset, 0);
+
+        let k = 1 << self.lg_k;
+        let c32 = (self.num_coupons as u64) << 5;
+        debug_assert!((c32 == (3 * k)) || ((self.lg_k == 4) && (c32 > (3 * 
k))));
+
+        self.sliding_window.resize(k as usize, 0);
+
+        let old_table = self
+            .surprising_value_table
+            .replace(PairTable::new(2, 6 + self.lg_k))
+            .expect("surprising value table must be initialized");
+        let old_slots = old_table.slots();
+        for &row_col in old_slots {
+            if row_col != u32::MAX {
+                let col = (row_col & 63) as u8;
+                if col < 8 {
+                    let row = (row_col >> 6) as usize;
+                    self.sliding_window[row] |= 1 << col;
+                } else {
+                    // cannot use must_insert(), because it doesn't provide 
for growth
+                    let is_novel = 
self.mut_surprising_value_table().maybe_insert(row_col);
+                    debug_assert!(is_novel);
+                }
+            }
+        }
+    }
+
+    fn update_windowed(&mut self, row_col: u32) {
+        debug_assert!(self.window_offset <= 56);
+        let k = 1 << self.lg_k;
+        let c32pre = (self.num_coupons as u64) << 5;
+        debug_assert!(c32pre >= 3 * k); // C >= 3K/32, in other words flavor 
>= HYBRID
+        let c8pre = (self.num_coupons as u64) << 3;
+        let w8pre = (self.window_offset as u64) << 3;
+        debug_assert!(c8pre < (27 + w8pre) * k); // C < (K * 27/8) + (K * 
windowOffset)
+
+        let mut is_novel = false; // novel if new coupon;
+        let col = (row_col & 63) as u8;
+        if col < self.window_offset {
+            // track the surprising 0's "before" the window
+            is_novel = 
self.mut_surprising_value_table().maybe_delete(row_col); // inverted logic
+        } else if col < self.window_offset + 8 {
+            // track the 8 bits inside the window
+            let row = (row_col >> 6) as usize;
+            let old_bits = self.sliding_window[row];
+            let new_bits = old_bits | (1 << (col - self.window_offset));
+            if old_bits != new_bits {
+                self.sliding_window[row] = new_bits;
+                is_novel = true;
+            }
+        } else {
+            // track the surprising 1's "after" the window
+            is_novel = 
self.mut_surprising_value_table().maybe_insert(row_col); // normal logic
+        }
+
+        if is_novel {
+            self.num_coupons += 1;
+            self.update_hip(row_col);
+            let c8post = (self.num_coupons as u64) << 3;
+            if c8post >= (27 + w8pre) * k {
+                self.move_window();
+                debug_assert!((1..=56).contains(&self.window_offset));
+                let w8post = (self.window_offset as u64) << 3;
+                debug_assert!(c8post < ((27 + w8post) * k)); // C < (K * 27/8) 
+ (K * windowOffset)
+            }
+        }
+    }
+
+    fn move_window(&mut self) {
+        let new_offset = self.window_offset + 1;
+        debug_assert!(new_offset <= 56);
+        debug_assert_eq!(
+            new_offset,
+            determine_correct_offset(self.lg_k, self.num_coupons)
+        );
+
+        let k = 1 << self.lg_k;
+
+        // Construct the full-sized bit matrix that corresponds to the sketch
+        let bit_matrix = self.build_bit_matrix();
+
+        // refresh the KXP register on every 8th window shift.
+        if (new_offset & 0x7) == 0 {
+            self.refresh_kxp(&bit_matrix);
+        }
+
+        self.mut_surprising_value_table().clear(); // the new number of 
surprises will be about the same
+
+        let mask_for_clearing_window = (0xFF << new_offset) ^ u64::MAX;
+        let mask_for_flipping_early_zone = (1u64 << new_offset) - 1;
+
+        let mut all_surprises_ored = 0u64;
+        for i in 0..k {
+            let mut pattern = bit_matrix[i];
+            self.sliding_window[i] = ((pattern >> new_offset) & 0xff) as u8;
+            pattern &= mask_for_clearing_window;
+            // The following line converts surprising 0's to 1's in the "early 
zone",
+            // (and vice versa, which is essential for this procedure's O(k) 
time cost).
+            pattern ^= mask_for_flipping_early_zone;
+            all_surprises_ored |= pattern; // a cheap way to recalculate 
first_interesting_column
+            while pattern != 0 {
+                let col = pattern.trailing_zeros();
+                pattern ^= 1 << col; // erase the 1
+                let row_col = ((i as u32) << 6) | col;
+                let is_novel = 
self.mut_surprising_value_table().maybe_insert(row_col);
+                debug_assert!(is_novel);
+            }
+        }
+
+        self.window_offset = new_offset;
+        self.first_interesting_column = all_surprises_ored.trailing_zeros() as 
u8;
+        if self.first_interesting_column > new_offset {
+            self.first_interesting_column = new_offset; // corner case
+        }
+    }
+
+    /// The KXP register is a double with roughly 50 bits of precision, but it 
might need roughly
+    /// 90 bits to track the value with perfect accuracy.
+    ///
+    /// Therefore, we recalculate KXP occasionally from the sketch's full 
bit_matrix so that it
+    /// will reflect changes that were previously outside the mantissa.
+    fn refresh_kxp(&mut self, bit_matrix: &[u64]) {
+        // for improved numerical accuracy, we separately sum the bytes of the 
u64's
+        let mut byte_sums = [0.0; 8];
+        for &bit in bit_matrix {
+            let mut word = bit;
+            for sum in byte_sums.iter_mut() {
+                let byte = (word & 0xFF) as usize;
+                *sum += KXP_BYTE_TABLE[byte];
+                word >>= 8;
+            }
+        }
+
+        let mut total = 0.0;
+        for i in (0..8).rev() {
+            // the reverse order is important
+            let factor = INVERSE_POWERS_OF_2[i * 8]; // pow (256.0, (-1.0 * 
((double) j)))
+            total += factor * byte_sums[i];
+        }
+
+        self.kxp = total;
+    }
+
+    pub(super) fn build_bit_matrix(&self) -> Vec<u64> {
+        let k = 1 << self.lg_k;
+        let offset = self.window_offset;
+        debug_assert!(offset <= 56);
+
+        // Fill the matrix with default rows in which the "early zone" is 
filled with ones.
+        // This is essential for the routine's O(k) time cost (as opposed to 
O(C)).
+        let default_row = (1u64 << offset) - 1;
+
+        let mut matrix = vec![default_row; k];
+        if self.num_coupons == 0 {
+            return matrix;
+        }
+
+        if !self.sliding_window.is_empty() {
+            // In other words, we are in window mode, not sparse mode
+            for i in 0..k {
+                // set the window bits, trusting the sketch's current offset
+                matrix[i] |= (self.sliding_window[i] as u64) << offset;
+            }
+        }
+
+        for &row_col in self.surprising_value_table().slots() {
+            if row_col != u32::MAX {
+                let col = (row_col & 63) as u8;
+                let row = (row_col >> 6) as usize;
+                // Flip the specified matrix bit from its default value.
+                // In the "early" zone the bit changes from 1 to 0.
+                // In the "late" zone the bit changes from 0 to 1.
+                matrix[row] ^= 1 << col;
+            }
+        }
+
+        matrix
+    }
+}
+
+impl CpcSketch {
+    /// Returns the estimated maximum compressed serialized size of a sketch.
+    ///
+    /// The actual size of a compressed CPC sketch has a small random 
variance, but the following
+    /// empirically measured size should be large enough for at least 99.9 
percent of sketches.
+    ///
+    /// For small values of `n` the size can be much smaller.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `lg_k` is not in the range `[4, 26]`.
+    pub fn max_serialized_bytes(lg_k: u8) -> usize {
+        let lg_k = lg_k as usize;
+        assert!(
+            (MIN_LG_K..=MAX_LG_K).contains(&lg_k),
+            "lg_k out of range; got {lg_k}",
+        );
+
+        // These empirical values for the 99.9th percentile of size in bytes 
were measured using
+        // 100,000 trials. The value for each trial is the maximum of 5*16=80 
measurements
+        // that were equally spaced over values of the quantity C/K between 
3.0 and 8.0.
+        // This table does not include the worst-case space for the preamble, 
which is added
+        // by the function.
+        const EMPIRICAL_SIZE_MAX_LGK: usize = 19;
+        const EMPIRICAL_MAX_SIZE_BYTES: [usize; 16] = [
+            24,     // lg_k = 4
+            36,     // lg_k = 5
+            56,     // lg_k = 6
+            100,    // lg_k = 7
+            180,    // lg_k = 8
+            344,    // lg_k = 9
+            660,    // lg_k = 10
+            1292,   // lg_k = 11
+            2540,   // lg_k = 12
+            5020,   // lg_k = 13
+            9968,   // lg_k = 14
+            19836,  // lg_k = 15
+            39532,  // lg_k = 16
+            78880,  // lg_k = 17
+            157516, // lg_k = 18
+            314656, // lg_k = 19
+        ];
+        const EMPIRICAL_MAX_SIZE_FACTOR: f64 = 0.6; // 0.6 = 4.8 / 8.0
+        const MAX_PREAMBLE_SIZE_BYTES: usize = 40;
+
+        if lg_k <= EMPIRICAL_SIZE_MAX_LGK {
+            return EMPIRICAL_MAX_SIZE_BYTES[lg_k - MIN_LG_K] + 
MAX_PREAMBLE_SIZE_BYTES;
+        }
+        let k = 1 << lg_k;
+        ((EMPIRICAL_MAX_SIZE_FACTOR * k as f64) as usize) + 
MAX_PREAMBLE_SIZE_BYTES
+    }
+}
+
+// testing methods
+impl CpcSketch {
+    /// Validate this sketch is valid.
+    ///
+    /// This is primarily for testing and validation purposes.
+    pub fn validate(&self) -> bool {
+        let bit_matrix = self.build_bit_matrix();
+        let num_bits_set = count_bits_set_in_matrix(&bit_matrix);
+        num_bits_set == self.num_coupons
+    }
+
+    /// Returns the number of coupons in the sketch.
+    ///
+    /// This is primarily for testing and validation purposes.
+    pub fn num_coupons(&self) -> u32 {
+        self.num_coupons
+    }
+}
diff --git a/datasketches/src/cpc/union.rs b/datasketches/src/cpc/union.rs
new file mode 100644
index 0000000..1a5255f
--- /dev/null
+++ b/datasketches/src/cpc/union.rs
@@ -0,0 +1,409 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! The merging logic is somewhat involved, so it will be summarized here.
+//!
+//! First, we compare the K values of the union and the source sketch.
+//!
+//! If `source.K < union.K`, we reduce the union's K to match, which requires 
downsampling the
+//! union's internal sketch.
+//!
+//! Here is how to perform the downsampling:
+//!
+//! If the union contains a bitMatrix, downsample it by row-wise ORing.
+//!
+//! If the union contains a sparse sketch, then create a new empty sketch, and 
walk the old target
+//! sketch updating the new one (with modulo). At the end, check whether the 
new target sketch is
+//! still in sparse mode (it might not be, because downsampling densifies the 
set of collected
+//! coupons). If it is NOT in sparse mode, immediately convert it to a 
bitMatrix.
+//!
+//! At this point, we have `source.K >= union.K`. (We won't keep mentioning 
this, but in all the
+//! following the source's row indices are used mod union.K while updating the 
union's sketch.
+//! That takes care of the situation where `source.K < union.K`.)
+//!
+//! Case A: union is Sparse and source is Sparse. We walk the source sketch 
updating the union's
+//! sketch. At the end, if the union's sketch is no longer in sparse mode, we 
convert it to a
+//! bitMatrix.
+//!
+//! Case B: union is bitMatrix and source is Sparse. We walk the source 
sketch, setting bits in the
+//! bitMatrix.
+//!
+//! In the remaining cases, we have flavor(source) > Sparse, so we immediately 
convert the union's
+//! sketch to a bitMatrix (even if the union contains very few coupons). Then:
+//!
+//! Case C: union is bitMatrix and source is Hybrid or Pinned. Then we OR the 
source's sliding
+//! window into the bitMatrix, and walk the source's table, setting bits in 
the bitMatrix.
+//!
+//! Case D: union is bitMatrix, and source is Sliding. Then we convert the 
source into a bitMatrix,
+//! and OR it into the union's bitMatrix. (Important note; merely walking the 
source wouldn't work
+//! because of the partially inverted Logic in the Sliding flavor, where the 
presence of coupons
+//! is sometimes indicated by the ABSENCE of row_col pairs in the surprises 
table.)
+//!
+//! How does [`CpcUnion::get_result`] work?
+//!
+//! If the union has an Accumulator state, make a copy of that sketch.
+//!
+//! If the union has a BitMatrix state, then we have to convert the bitMatrix 
back into a sketch,
+//! which requires doing some extra work to figure out the values of 
num_coupons, offset,
+//! first_interesting_column, and kxp.
+
+use crate::cpc::CpcSketch;
+use crate::cpc::DEFAULT_LG_K;
+use crate::cpc::Flavor;
+use crate::cpc::count_bits_set_in_matrix;
+use crate::cpc::determine_correct_offset;
+use crate::cpc::pair_table::PairTable;
+use crate::hash::DEFAULT_UPDATE_SEED;
+
+/// The union (merge) operation for the CPC sketches.
+#[derive(Debug, Clone)]
+pub struct CpcUnion {
+    // immutable config variables
+    lg_k: u8,
+    seed: u64,
+
+    // union state
+    state: UnionState,
+}
+
+impl Default for CpcUnion {
+    fn default() -> Self {
+        Self::new(DEFAULT_LG_K)
+    }
+}
+
+impl CpcUnion {
+    /// Creates a new `CpcUnion` with the given `lg_k` and default seed.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `lg_k` is not in the range `[4, 16]`.
+    pub fn new(lg_k: u8) -> Self {
+        Self::with_seed(lg_k, DEFAULT_UPDATE_SEED)
+    }
+
+    /// Creates a new `CpcUnion` with the given `lg_k` and `seed`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `lg_k` is not in the range `[4, 16]`.
+    pub fn with_seed(lg_k: u8, seed: u64) -> Self {
+        // We begin with the accumulator holding an EMPTY_MERGED sketch object.
+        let sketch = CpcSketch::with_seed(lg_k, seed);
+        let state = UnionState::Accumulator(sketch);
+        Self { lg_k, seed, state }
+    }
+
+    /// Return the parameter lg_k.
+    ///
+    /// Note that due to merging with source sketches that may have a lower 
value of lg_k, this
+    /// value can be less than what the union object was configured with.
+    pub fn lg_k(&self) -> u8 {
+        self.lg_k
+    }
+
+    /// Returns the result of union operations as a CPC sketch.
+    pub fn get_result(&self) -> CpcSketch {
+        match &self.state {
+            UnionState::Accumulator(sketch) => {
+                if sketch.is_empty() {
+                    CpcSketch::with_seed(self.lg_k, self.seed)
+                } else {
+                    let mut sketch = sketch.clone();
+                    assert_eq!(sketch.flavor(), Flavor::SPARSE);
+                    sketch.merge_flag = true;
+                    sketch
+                }
+            }
+            UnionState::BitMatrix(matrix) => {
+                let lg_k = self.lg_k;
+
+                let mut sketch = CpcSketch::with_seed(lg_k, self.seed);
+                let num_coupons = count_bits_set_in_matrix(matrix);
+                sketch.num_coupons = num_coupons;
+                let offset = determine_correct_offset(lg_k, num_coupons);
+                sketch.window_offset = offset;
+
+                // Build the window and pair table
+                let k = 1 << lg_k;
+                let mut sliding_window = vec![0u8; k];
+
+                // LgSize = K/16; in some cases this will end up being 
oversized
+                let new_table_lg_size = (lg_k - 4).max(2);
+                let mut table = PairTable::new(new_table_lg_size, 6 + lg_k);
+
+                // the following should work even when the offset is zero
+                let mask_for_clearing_window = (0xFFu64 << offset) ^ u64::MAX;
+                let mask_for_flipping_early_zone = (1u64 << offset) - 1;
+                let mut all_surprises_ored = 0;
+
+                // The snowplow effect was caused by processing the rows in 
order,
+                // but we have fixed it by using a sufficiently large hash 
table.
+                for i in 0..k {
+                    let mut pattern = matrix[i];
+                    sliding_window[i] = ((pattern >> offset) & 0xFF) as u8;
+                    pattern &= mask_for_clearing_window;
+                    pattern ^= mask_for_flipping_early_zone; // this flipping 
converts surprising 0's to 1's
+                    all_surprises_ored |= pattern;
+                    while pattern != 0 {
+                        let col = pattern.trailing_zeros();
+                        pattern ^= 1u64 << col; // erase the 1
+                        let row_col = ((i as u32) << 6) | col;
+                        let is_novel = table.maybe_insert(row_col);
+                        assert!(is_novel);
+                    }
+                }
+
+                // at this point we could shrink an oversized hash table, but 
the relative waste
+                // isn't very big
+                sketch.first_interesting_column = 
all_surprises_ored.trailing_zeros() as u8;
+                if sketch.first_interesting_column > offset {
+                    sketch.first_interesting_column = offset; // corner case
+                }
+
+                // HIP-related fields will contain zeros, and that is okay 
since merge_flag is true,
+                // and thus the HIP estimator will not be used.
+                sketch.sliding_window = sliding_window;
+                sketch.surprising_value_table = Some(table);
+                sketch.merge_flag = true;
+
+                sketch
+            }
+        }
+    }
+
+    /// Update this union with a CpcSketch.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the seed of the provided sketch does not match the seed of 
this union.
+    pub fn update(&mut self, sketch: &CpcSketch) {
+        assert_eq!(self.seed, sketch.seed());
+
+        let flavor = sketch.flavor();
+        if flavor == Flavor::EMPTY {
+            return;
+        }
+
+        if sketch.lg_k() < self.lg_k {
+            self.reduce_k(sketch.lg_k());
+        }
+
+        // if source is past SPARSE mode, make sure that union is a bitMatrix.
+        if flavor > Flavor::SPARSE {
+            if let UnionState::Accumulator(old_sketch) = &self.state {
+                // convert the accumulator to a bit matrix
+                let bit_matrix = old_sketch.build_bit_matrix();
+                self.state = UnionState::BitMatrix(bit_matrix);
+            }
+        }
+
+        match &mut self.state {
+            UnionState::Accumulator(old_sketch) => {
+                // [Case A] Sparse, bitMatrix == null, accumulator valid
+                if flavor == Flavor::SPARSE {
+                    let old_flavor = old_sketch.flavor();
+                    if old_flavor != Flavor::SPARSE && old_flavor != 
Flavor::EMPTY {
+                        unreachable!(
+                            "unexpected old flavor in union accumulator: {:?}",
+                            old_flavor
+                        );
+                    }
+
+                    // The following partially fixes the snowplow problem 
provided that the K's
+                    // are equal.
+                    if old_flavor == Flavor::EMPTY && self.lg_k == 
sketch.lg_k() {
+                        *old_sketch = sketch.clone();
+                        return;
+                    }
+
+                    walk_table_updating_sketch(old_sketch, 
sketch.surprising_value_table());
+                    let final_flavor = old_sketch.flavor();
+
+                    // if the accumulator has graduated beyond sparse, switch 
to a bit matrix
+                    // representation
+                    if final_flavor > Flavor::SPARSE {
+                        let bit_matrix = old_sketch.build_bit_matrix();
+                        self.state = UnionState::BitMatrix(bit_matrix);
+                    }
+
+                    return;
+                }
+
+                // If flavor is past SPARSE mode, the state must have been 
converted to bitMatrix.
+                // Otherwise, the EMPTY case was handled at the start of this 
method, and the
+                // SPARSE case was handled above.
+                unreachable!("unexpected flavor in union accumulator: {:?}", 
flavor);
+            }
+            UnionState::BitMatrix(old_matrix) => {
+                if flavor == Flavor::SPARSE {
+                    // [Case B] Sparse, bitMatrix valid, accumulator == null
+                    or_table_into_matrix(old_matrix, self.lg_k, 
sketch.surprising_value_table());
+                    return;
+                }
+
+                if matches!(flavor, Flavor::HYBRID | Flavor::PINNED) {
+                    // [Case C] Hybrid, bitMatrix valid, accumulator == null
+                    // [Case C] Pinned, bitMatrix valid, accumulator == null
+                    or_window_into_matrix(
+                        old_matrix,
+                        self.lg_k,
+                        &sketch.sliding_window,
+                        sketch.window_offset,
+                        sketch.lg_k(),
+                    );
+                    or_table_into_matrix(old_matrix, self.lg_k, 
sketch.surprising_value_table());
+                    return;
+                }
+
+                // [Case D] Sliding, bitMatrix valid, accumulator == null
+                // SLIDING mode involves inverted logic, so we cannot just 
walk the source sketch.
+                // Instead, we convert it to a bitMatrix that can be ORed into 
the destination.
+                assert_eq!(flavor, Flavor::SLIDING);
+                let src_matrix = sketch.build_bit_matrix();
+                or_matrix_into_matrix(old_matrix, self.lg_k, &src_matrix, 
sketch.lg_k());
+            }
+        }
+    }
+
+    fn reduce_k(&mut self, new_lg_k: u8) {
+        match &mut self.state {
+            UnionState::Accumulator(sketch) => {
+                if sketch.is_empty() {
+                    self.lg_k = new_lg_k;
+                    self.state = 
UnionState::Accumulator(CpcSketch::with_seed(new_lg_k, self.seed));
+                    return;
+                }
+
+                let mut new_sketch = CpcSketch::with_seed(new_lg_k, self.seed);
+                walk_table_updating_sketch(&mut new_sketch, 
sketch.surprising_value_table());
+
+                let final_new_flavor = new_sketch.flavor();
+                // SV table had to have something in it
+                assert_ne!(final_new_flavor, Flavor::EMPTY);
+                if final_new_flavor == Flavor::SPARSE {
+                    self.lg_k = new_lg_k;
+                    self.state = UnionState::Accumulator(new_sketch);
+                    return;
+                }
+
+                // the new sketch has graduated beyond sparse, so convert to 
bitMatrix
+                self.lg_k = new_lg_k;
+                self.state = 
UnionState::BitMatrix(new_sketch.build_bit_matrix());
+            }
+            UnionState::BitMatrix(matrix) => {
+                let new_k = 1 << new_lg_k;
+                let mut new_matrix = vec![0; new_k];
+                or_matrix_into_matrix(&mut new_matrix, new_lg_k, matrix, 
self.lg_k);
+                self.lg_k = new_lg_k;
+                self.state = UnionState::BitMatrix(new_matrix);
+            }
+        }
+    }
+}
+
+// testing methods
+impl CpcUnion {
+    /// Returns the number of coupons in the union.
+    ///
+    /// This is primarily for testing and validation purposes.
+    pub fn num_coupons(&self) -> u32 {
+        match &self.state {
+            UnionState::Accumulator(sketch) => sketch.num_coupons,
+            UnionState::BitMatrix(matrix) => count_bits_set_in_matrix(matrix),
+        }
+    }
+}
+
+fn or_window_into_matrix(
+    dst_matrix: &mut [u64],
+    dst_lg_k: u8,
+    src_window: &[u8],
+    src_offset: u8,
+    src_lg_k: u8,
+) {
+    assert!(dst_lg_k <= src_lg_k);
+    let dst_mask = (1 << dst_lg_k) - 1; // downsamples when dst_lg_k < src_lg_k
+    let src_k = 1 << src_lg_k;
+    for src_row in 0..src_k {
+        dst_matrix[src_row & dst_mask] |= (src_window[src_row] as u64) << 
src_offset;
+    }
+}
+
+fn or_table_into_matrix(dst_matrix: &mut [u64], dst_lg_k: u8, src_table: 
&PairTable) {
+    let dst_mask = (1 << dst_lg_k) - 1; // downsamples when dst_lg_k < src_lg_k
+    let slots = src_table.slots();
+    for &row_col in slots.iter() {
+        if row_col != u32::MAX {
+            let src_row = (row_col >> 6) as usize;
+            let src_col = (row_col & 63) as usize;
+            let dst_row = src_row & dst_mask;
+            dst_matrix[dst_row] |= 1u64 << src_col;
+        }
+    }
+}
+
+fn or_matrix_into_matrix(dst_matrix: &mut [u64], dst_lg_k: u8, src_matrix: 
&[u64], src_lg_k: u8) {
+    assert!(dst_lg_k <= src_lg_k);
+    let dst_mask = (1 << dst_lg_k) - 1; // downsamples when dst_lg_k < src_lg_k
+    let src_k = 1 << src_lg_k;
+    for src_row in 0..src_k {
+        let dst_row = src_row & dst_mask;
+        dst_matrix[dst_row] |= src_matrix[src_row];
+    }
+}
+
+fn walk_table_updating_sketch(sketch: &mut CpcSketch, table: &PairTable) {
+    assert!(sketch.lg_k() <= 26);
+
+    let slots = table.slots();
+    let num_slots = slots.len() as u32;
+
+    // downsamples when dst lgK < src LgK
+    let dst_mask = (((1u64 << sketch.lg_k()) - 1) << 6) | 63;
+    // Using a golden ratio stride fixes the snowplow effect.
+    let mut stride = (0.6180339887498949 * (num_slots as f64)) as u32;
+    assert!(stride >= 2);
+    if stride == ((stride >> 1) << 1) {
+        // force the stride to be odd
+        stride += 1;
+    }
+    assert!((stride >= 3) && (stride < num_slots));
+
+    let mut k = 0;
+    for _ in 0..num_slots {
+        k &= num_slots - 1;
+        let row_col = slots[k as usize];
+        if row_col != u32::MAX {
+            sketch.row_col_update(row_col & (dst_mask as u32));
+        }
+        k += stride;
+    }
+}
+
+/// The internal State of the Union operation.
+///
+/// At most one of BitMatrix and accumulator will be non-null at any given 
moment. Accumulator is a
+/// sketch object that is employed until it graduates out of Sparse mode.
+///
+/// At that point, it is converted into a full-sized bitMatrix, which is 
mathematically a sketch,
+/// but doesn't maintain any of the "extra" fields of our sketch objects.
+#[derive(Debug, Clone)]
+enum UnionState {
+    Accumulator(CpcSketch),
+    BitMatrix(Vec<u64>),
+}
diff --git a/datasketches/src/error.rs b/datasketches/src/error.rs
index dcb0592..7e8b663 100644
--- a/datasketches/src/error.rs
+++ b/datasketches/src/error.rs
@@ -89,7 +89,7 @@ impl Error {
     }
 }
 
-// Convenience constructors for deserialization errors
+// Convenient constructors used within datasketches crate.
 impl Error {
     pub(crate) fn invalid_argument(msg: impl Into<String>) -> Self {
         Self::new(ErrorKind::InvalidArgument, msg)
diff --git a/datasketches/src/hll/sketch.rs b/datasketches/src/hll/sketch.rs
index 8c79625..bc1ce42 100644
--- a/datasketches/src/hll/sketch.rs
+++ b/datasketches/src/hll/sketch.rs
@@ -40,7 +40,7 @@ use crate::hll::serialization::*;
 
 /// A HyperLogLog sketch.
 ///
-/// See the [hll module level documentation](crate::hll) for more.
+/// See the [module level documentation](super) for more.
 #[derive(Debug, Clone, PartialEq)]
 pub struct HllSketch {
     lg_config_k: u8,
diff --git a/datasketches/src/hll/union.rs b/datasketches/src/hll/union.rs
index 5dba0bb..870c8d8 100644
--- a/datasketches/src/hll/union.rs
+++ b/datasketches/src/hll/union.rs
@@ -45,7 +45,7 @@ use crate::hll::pack_coupon;
 /// the union of all input sketches. It automatically handles sketches with
 /// different configurations and modes.
 ///
-/// See the [hll module level documentation](crate::hll) for more.
+/// See the [module level documentation](super) for more.
 #[derive(Debug, Clone)]
 pub struct HllUnion {
     /// Maximum lg_k that this union can handle
diff --git a/datasketches/src/lib.rs b/datasketches/src/lib.rs
index 009fd9e..17701ab 100644
--- a/datasketches/src/lib.rs
+++ b/datasketches/src/lib.rs
@@ -33,6 +33,7 @@ compile_error!("datasketches does not support big-endian 
targets");
 pub mod bloom;
 pub mod common;
 pub mod countmin;
+pub mod cpc;
 pub mod error;
 pub mod frequencies;
 pub mod hll;
diff --git a/datasketches/src/tdigest/sketch.rs 
b/datasketches/src/tdigest/sketch.rs
index 91c90c7..a790892 100644
--- a/datasketches/src/tdigest/sketch.rs
+++ b/datasketches/src/tdigest/sketch.rs
@@ -22,7 +22,6 @@ use std::num::NonZeroU64;
 use crate::codec::SketchBytes;
 use crate::codec::SketchSlice;
 use crate::error::Error;
-use crate::error::ErrorKind;
 use crate::tdigest::serialization::*;
 
 /// The default value of K if one is not specified.
@@ -34,7 +33,7 @@ const DEFAULT_WEIGHT: NonZeroU64 = 
NonZeroU64::new(1).unwrap();
 
 /// T-Digest sketch for estimating quantiles and ranks.
 ///
-/// See the [tdigest module level documentation](crate::tdigest) for more.
+/// See the [module level documentation](super) for more.
 #[derive(Debug, Clone)]
 pub struct TDigestMut {
     k: u16,
@@ -100,10 +99,9 @@ impl TDigestMut {
     /// ```
     pub fn try_new(k: u16) -> Result<Self, Error> {
         if k < 10 {
-            return Err(Error::new(
-                ErrorKind::InvalidArgument,
-                format!("k must be at least 10, got {k}"),
-            ));
+            return Err(Error::invalid_argument(format!(
+                "k must be at least 10, got {k}"
+            )));
         }
 
         Ok(Self::make(
@@ -788,7 +786,7 @@ impl TDigestMut {
 
 /// Immutable (frozen) T-Digest sketch for estimating quantiles and ranks.
 ///
-/// See the [module documentation](super) for more details.
+/// See the [module level documentation](super) for more.
 pub struct TDigest {
     k: u16,
 
diff --git a/datasketches/src/theta/sketch.rs b/datasketches/src/theta/sketch.rs
index 506b2b6..07110d4 100644
--- a/datasketches/src/theta/sketch.rs
+++ b/datasketches/src/theta/sketch.rs
@@ -25,6 +25,7 @@ use std::hash::Hash;
 use crate::common::NumStdDev;
 use crate::common::ResizeFactor;
 use crate::common::binomial_bounds;
+use crate::common::canonical_double;
 use crate::hash::DEFAULT_UPDATE_SEED;
 use crate::theta::hash_table::DEFAULT_LG_K;
 use crate::theta::hash_table::MAX_LG_K;
@@ -52,7 +53,9 @@ impl ThetaSketch {
         ThetaSketchBuilder::default()
     }
 
-    /// Update the sketch with a hashable value
+    /// Update the sketch with a hashable value.
+    ///
+    /// For `f32`/`f64` values, use `update_f32`/`update_f64` instead.
     ///
     /// # Examples
     ///
@@ -69,7 +72,7 @@ impl ThetaSketch {
         }
     }
 
-    /// Update the sketch with a f64 value
+    /// Update the sketch with a f64 value.
     ///
     /// # Examples
     ///
@@ -85,7 +88,7 @@ impl ThetaSketch {
         self.update(canonical);
     }
 
-    /// Update the sketch with a f32 value
+    /// Update the sketch with a f32 value.
     ///
     /// # Examples
     ///
@@ -356,16 +359,3 @@ impl ThetaSketchBuilder {
         ThetaSketch { table }
     }
 }
-
-/// Canonicalize double value for compatibility with Java
-fn canonical_double(value: f64) -> i64 {
-    if value.is_nan() {
-        // Java's Double.doubleToLongBits() NaN value
-        0x7ff8000000000000i64
-    } else {
-        // -0.0 + 0.0 == +0.0 under IEEE754 roundTiesToEven rounding mode,
-        // which Rust guarantees. Thus, by adding a positive zero we
-        // canonicalize signed zero without any branches in one instruction.
-        (value + 0.0).to_bits() as i64
-    }
-}
diff --git a/datasketches/tests/cpc_union_test.rs 
b/datasketches/tests/cpc_union_test.rs
new file mode 100644
index 0000000..6e8aa32
--- /dev/null
+++ b/datasketches/tests/cpc_union_test.rs
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datasketches::cpc::CpcSketch;
+use datasketches::cpc::CpcUnion;
+use googletest::assert_that;
+use googletest::prelude::near;
+
+const RELATIVE_ERROR_FOR_LG_K_11: f64 = 0.02;
+
+#[test]
+fn test_empty() {
+    let union = CpcUnion::new(11);
+    let sketch = union.get_result();
+    assert!(sketch.is_empty());
+    assert_eq!(sketch.estimate(), 0.0);
+}
+
+#[test]
+fn test_two_values() {
+    let mut sketch = CpcSketch::new(11);
+    sketch.update(1);
+    let mut union = CpcUnion::new(11);
+    union.update(&sketch);
+
+    let result = union.get_result();
+    assert!(!result.is_empty());
+    assert_eq!(result.estimate(), 1.0);
+
+    sketch.update(2);
+    union.update(&sketch);
+    let result = union.get_result();
+    assert!(!result.is_empty());
+    assert_that!(
+        sketch.estimate(),
+        near(2.0, RELATIVE_ERROR_FOR_LG_K_11 * 2.0)
+    );
+}
+
+#[test]
+fn test_custom_seed() {
+    let mut sketch = CpcSketch::with_seed(11, 123);
+    sketch.update(1);
+    sketch.update(2);
+    sketch.update(3);
+
+    let mut union = CpcUnion::with_seed(11, 123);
+    union.update(&sketch);
+    let result = union.get_result();
+    assert!(!result.is_empty());
+    assert_that!(
+        result.estimate(),
+        near(3.0, RELATIVE_ERROR_FOR_LG_K_11 * 3.0)
+    );
+}
+
+#[test]
+#[should_panic]
+fn test_custom_seed_mismatch() {
+    let mut sketch = CpcSketch::with_seed(11, 123);
+    sketch.update(1);
+    sketch.update(2);
+    sketch.update(3);
+
+    let mut union = CpcUnion::with_seed(11, 234);
+    union.update(&sketch);
+}
+
+#[test]
+fn test_large_values() {
+    let mut key = 0;
+    let mut sketch = CpcSketch::new(11);
+    let mut union = CpcUnion::new(11);
+    for _ in 0..1000 {
+        let mut tmp = CpcSketch::new(11);
+        for _ in 0..10000 {
+            sketch.update(key);
+            tmp.update(key);
+            key += 1;
+        }
+        union.update(&tmp);
+    }
+    let result = union.get_result();
+    assert!(!result.is_empty());
+    assert_eq!(result.num_coupons(), union.num_coupons());
+    let estimate = sketch.estimate();
+    assert_that!(
+        result.estimate(),
+        near(estimate, RELATIVE_ERROR_FOR_LG_K_11 * estimate)
+    );
+}
+
+#[test]
+fn test_reduce_k_empty() {
+    let mut sketch = CpcSketch::new(11);
+    for i in 0..10000 {
+        sketch.update(i);
+    }
+    let mut union = CpcUnion::new(12);
+    union.update(&sketch);
+    let result = union.get_result();
+    assert_eq!(result.lg_k(), 11);
+    assert_that!(
+        result.estimate(),
+        near(10000.0, RELATIVE_ERROR_FOR_LG_K_11 * 10000.0)
+    );
+}
+
+#[test]
+fn test_reduce_k_sparse() {
+    let mut union = CpcUnion::new(12);
+
+    let mut sketch12 = CpcSketch::new(12);
+    for i in 0..100 {
+        sketch12.update(i);
+    }
+    union.update(&sketch12);
+
+    let mut sketch11 = CpcSketch::new(11);
+    for i in 0..1000 {
+        sketch11.update(i);
+    }
+    union.update(&sketch11);
+
+    let result = union.get_result();
+    assert_eq!(result.lg_k(), 11);
+    assert_that!(
+        result.estimate(),
+        near(1000.0, RELATIVE_ERROR_FOR_LG_K_11 * 10000.0)
+    );
+}
+
+#[test]
+fn test_reduce_k_window() {
+    let mut union = CpcUnion::new(12);
+
+    let mut sketch12 = CpcSketch::new(12);
+    for i in 0..500 {
+        sketch12.update(i);
+    }
+    union.update(&sketch12);
+
+    let mut sketch11 = CpcSketch::new(11);
+    for i in 0..1000 {
+        sketch11.update(i);
+    }
+    union.update(&sketch11);
+
+    let result = union.get_result();
+    assert_eq!(result.lg_k(), 11);
+    assert_that!(
+        result.estimate(),
+        near(1000.0, RELATIVE_ERROR_FOR_LG_K_11 * 10000.0)
+    );
+}
diff --git a/datasketches/tests/cpc_update_test.rs 
b/datasketches/tests/cpc_update_test.rs
new file mode 100644
index 0000000..7b814f7
--- /dev/null
+++ b/datasketches/tests/cpc_update_test.rs
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datasketches::common::NumStdDev;
+use datasketches::cpc::CpcSketch;
+use googletest::assert_that;
+use googletest::prelude::ge;
+use googletest::prelude::le;
+use googletest::prelude::near;
+
+const RELATIVE_ERROR_FOR_LG_K_11: f64 = 0.02;
+
+#[test]
+fn test_empty() {
+    let sketch = CpcSketch::new(11);
+    assert!(sketch.is_empty());
+    assert_eq!(sketch.estimate(), 0.0);
+    assert_eq!(sketch.lower_bound(NumStdDev::One), 0.0);
+    assert_eq!(sketch.upper_bound(NumStdDev::One), 0.0);
+    assert!(sketch.validate());
+}
+
+#[test]
+fn test_one_value() {
+    let mut sketch = CpcSketch::new(11);
+    sketch.update(1);
+    assert!(!sketch.is_empty());
+    assert_eq!(sketch.estimate(), 1.0);
+    assert_that!(sketch.estimate(), ge(sketch.lower_bound(NumStdDev::One)));
+    assert_that!(sketch.estimate(), le(sketch.upper_bound(NumStdDev::One)));
+    assert!(sketch.validate());
+}
+
+#[test]
+fn test_many_values() {
+    let mut sketch = CpcSketch::new(11);
+    for i in 0..10000 {
+        sketch.update(i);
+    }
+    assert!(!sketch.is_empty());
+    assert_that!(
+        sketch.estimate(),
+        near(10000.0, RELATIVE_ERROR_FOR_LG_K_11 * 10000.0)
+    );
+    assert_that!(sketch.estimate(), ge(sketch.lower_bound(NumStdDev::One)));
+    assert_that!(sketch.estimate(), le(sketch.upper_bound(NumStdDev::One)));
+    assert!(sketch.validate());
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datasketches-rust) branch main updated: feat: impl CpcSketch (#75)

Reply via email to