This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 774fa42 build: generate snapshots on the fly (#29)
774fa42 is described below
commit 774fa42a2f5e0ff563ec6518b6ee47c39246c7f1
Author: tison <[email protected]>
AuthorDate: Fri Dec 19 13:04:37 2025 +0800
build: generate snapshots on the fly (#29)
* build: generate snapshots on the fly
Signed-off-by: tison <[email protected]>
* impl with docker
Signed-off-by: tison <[email protected]>
* use a python script
Signed-off-by: tison <[email protected]>
* for cpp snapshots
Signed-off-by: tison <[email protected]>
* tidy
Signed-off-by: tison <[email protected]>
* Add tool to generate serialization test data for Java and C++
* setup jdk 25
Signed-off-by: tison <[email protected]>
* Enhance snapshot generation script: Windows support and output capturing
* Stream command output to stdout for real-time CI logs
* fine tune
Signed-off-by: tison <[email protected]>
---------
Signed-off-by: tison <[email protected]>
---
.github/workflows/ci.yml | 8 +
.gitignore | 4 +
tests/.gitignore | 1 +
tests/hll_serialization_test.rs | 20 ++-
.../cpp_generated_files/hll4_n0_cpp.sk | Bin 8 -> 0 bytes
.../cpp_generated_files/hll4_n1000000_cpp.sk | Bin 2092 -> 0 bytes
.../cpp_generated_files/hll4_n100000_cpp.sk | Bin 2092 -> 0 bytes
.../cpp_generated_files/hll4_n10000_cpp.sk | Bin 2088 -> 0 bytes
.../cpp_generated_files/hll4_n1000_cpp.sk | Bin 2088 -> 0 bytes
.../cpp_generated_files/hll4_n100_cpp.sk | Bin 412 -> 0 bytes
.../cpp_generated_files/hll4_n10_cpp.sk | Bin 52 -> 0 bytes
.../cpp_generated_files/hll4_n1_cpp.sk | Bin 12 -> 0 bytes
.../cpp_generated_files/hll6_n0_cpp.sk | Bin 8 -> 0 bytes
.../cpp_generated_files/hll6_n1000000_cpp.sk | Bin 3113 -> 0 bytes
.../cpp_generated_files/hll6_n100000_cpp.sk | Bin 3113 -> 0 bytes
.../cpp_generated_files/hll6_n10000_cpp.sk | Bin 3113 -> 0 bytes
.../cpp_generated_files/hll6_n1000_cpp.sk | Bin 3113 -> 0 bytes
.../cpp_generated_files/hll6_n100_cpp.sk | Bin 412 -> 0 bytes
.../cpp_generated_files/hll6_n10_cpp.sk | Bin 52 -> 0 bytes
.../cpp_generated_files/hll6_n1_cpp.sk | 1 -
.../cpp_generated_files/hll8_n0_cpp.sk | Bin 8 -> 0 bytes
.../cpp_generated_files/hll8_n1000000_cpp.sk | Bin 4136 -> 0 bytes
.../cpp_generated_files/hll8_n100000_cpp.sk | Bin 4136 -> 0 bytes
.../cpp_generated_files/hll8_n10000_cpp.sk | Bin 4136 -> 0 bytes
.../cpp_generated_files/hll8_n1000_cpp.sk | Bin 4136 -> 0 bytes
.../cpp_generated_files/hll8_n100_cpp.sk | Bin 412 -> 0 bytes
.../cpp_generated_files/hll8_n10_cpp.sk | Bin 52 -> 0 bytes
.../cpp_generated_files/hll8_n1_cpp.sk | 1 -
.../java_generated_files/hll4_n0_java.sk | Bin 8 -> 0 bytes
.../java_generated_files/hll4_n1000000_java.sk | Bin 2092 -> 0 bytes
.../java_generated_files/hll4_n100000_java.sk | Bin 2092 -> 0 bytes
.../java_generated_files/hll4_n10000_java.sk | Bin 2088 -> 0 bytes
.../java_generated_files/hll4_n1000_java.sk | Bin 2088 -> 0 bytes
.../java_generated_files/hll4_n100_java.sk | Bin 412 -> 0 bytes
.../java_generated_files/hll4_n10_java.sk | Bin 52 -> 0 bytes
.../java_generated_files/hll4_n1_java.sk | Bin 12 -> 0 bytes
.../java_generated_files/hll6_n0_java.sk | Bin 8 -> 0 bytes
.../java_generated_files/hll6_n1000000_java.sk | Bin 3113 -> 0 bytes
.../java_generated_files/hll6_n100000_java.sk | Bin 3113 -> 0 bytes
.../java_generated_files/hll6_n10000_java.sk | Bin 3113 -> 0 bytes
.../java_generated_files/hll6_n1000_java.sk | Bin 3113 -> 0 bytes
.../java_generated_files/hll6_n100_java.sk | Bin 412 -> 0 bytes
.../java_generated_files/hll6_n10_java.sk | Bin 52 -> 0 bytes
.../java_generated_files/hll6_n1_java.sk | 1 -
.../java_generated_files/hll8_n0_java.sk | Bin 8 -> 0 bytes
.../java_generated_files/hll8_n1000000_java.sk | Bin 4136 -> 0 bytes
.../java_generated_files/hll8_n100000_java.sk | Bin 4136 -> 0 bytes
.../java_generated_files/hll8_n10000_java.sk | Bin 4136 -> 0 bytes
.../java_generated_files/hll8_n1000_java.sk | Bin 4136 -> 0 bytes
.../java_generated_files/hll8_n100_java.sk | Bin 412 -> 0 bytes
.../java_generated_files/hll8_n10_java.sk | Bin 52 -> 0 bytes
.../java_generated_files/hll8_n1_java.sk | 1 -
tools/generate_serialization_test_data.py | 188 +++++++++++++++++++++
53 files changed, 219 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 643e96e..6ce091d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -86,6 +86,14 @@ jobs:
run: |
rustup toolchain install ${{ matrix.rust-version }}
rustup default ${{ matrix.rust-version }}
+ - name: Setup Java
+ uses: actions/setup-java@v5
+ with:
+ java-version: '25'
+ distribution: 'temurin'
+ - name: Prepare test data
+ shell: bash
+ run: ./tools/generate_serialization_test_data.py
- name: Build
run: cargo build --workspace --all-features --bins --tests --examples
--benches --lib
- name: Run unit tests
diff --git a/.gitignore b/.gitignore
index 36d5e44..3bd854a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,3 +36,7 @@
# Build artifacts
**/target
+
+# Temporary files
+tmp_datasketches_java/
+tmp_datasketches_cpp/
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000..2c1198d
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1 @@
+serialization_test_data
diff --git a/tests/hll_serialization_test.rs b/tests/hll_serialization_test.rs
index 23e9973..a3c397b 100644
--- a/tests/hll_serialization_test.rs
+++ b/tests/hll_serialization_test.rs
@@ -32,10 +32,26 @@ use datasketches::hll::HllSketch;
const TEST_DATA_DIR: &str = "tests/serialization_test_data";
fn get_test_data_path(sub_dir: &str, name: &str) -> PathBuf {
- PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+ let path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join(TEST_DATA_DIR)
.join(sub_dir)
- .join(name)
+ .join(name);
+
+ if !path.exists() {
+ panic!(
+ r#"serialization test data file not found: {}
+
+ Please ensure test data files are present in the repository.
Generally, you can
+ run the following commands from the project root to regenerate the
test data files
+ if they are missing:
+
+ $ ./tools/generate_serialization_test_data.py
+ "#,
+ path.display(),
+ );
+ }
+
+ path
}
fn test_sketch_file(path: PathBuf, expected_cardinality: usize, expected_lg_k:
u8) {
diff --git a/tests/serialization_test_data/cpp_generated_files/hll4_n0_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll4_n0_cpp.sk
deleted file mode 100644
index 074868c..0000000
Binary files a/tests/serialization_test_data/cpp_generated_files/hll4_n0_cpp.sk
and /dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll4_n1000000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll4_n1000000_cpp.sk
deleted file mode 100644
index 9a8c626..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll4_n1000000_cpp.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll4_n100000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll4_n100000_cpp.sk
deleted file mode 100644
index 2b4b4dc..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll4_n100000_cpp.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll4_n10000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll4_n10000_cpp.sk
deleted file mode 100644
index e97b181..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll4_n10000_cpp.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll4_n1000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll4_n1000_cpp.sk
deleted file mode 100644
index a6b5f9a..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll4_n1000_cpp.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll4_n100_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll4_n100_cpp.sk
deleted file mode 100644
index 0e12ff4..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll4_n100_cpp.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll4_n10_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll4_n10_cpp.sk
deleted file mode 100644
index e4ed655..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll4_n10_cpp.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll4_n1_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll4_n1_cpp.sk
deleted file mode 100644
index 1f4f6ee..0000000
Binary files a/tests/serialization_test_data/cpp_generated_files/hll4_n1_cpp.sk
and /dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll6_n0_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll6_n0_cpp.sk
deleted file mode 100644
index c31bd26..0000000
Binary files a/tests/serialization_test_data/cpp_generated_files/hll6_n0_cpp.sk
and /dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll6_n1000000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll6_n1000000_cpp.sk
deleted file mode 100644
index a51d1e0..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll6_n1000000_cpp.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll6_n100000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll6_n100000_cpp.sk
deleted file mode 100644
index aba532b..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll6_n100000_cpp.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll6_n10000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll6_n10000_cpp.sk
deleted file mode 100644
index cd49aad..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll6_n10000_cpp.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll6_n1000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll6_n1000_cpp.sk
deleted file mode 100644
index e3d5f00..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll6_n1000_cpp.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll6_n100_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll6_n100_cpp.sk
deleted file mode 100644
index 71fc293..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll6_n100_cpp.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll6_n10_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll6_n10_cpp.sk
deleted file mode 100644
index 2a22d91..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll6_n10_cpp.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll6_n1_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll6_n1_cpp.sk
deleted file mode 100644
index 3f2f545..0000000
--- a/tests/serialization_test_data/cpp_generated_files/hll6_n1_cpp.sk
+++ /dev/null
@@ -1 +0,0 @@
-���
\ No newline at end of file
diff --git a/tests/serialization_test_data/cpp_generated_files/hll8_n0_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll8_n0_cpp.sk
deleted file mode 100644
index 5ecf871..0000000
Binary files a/tests/serialization_test_data/cpp_generated_files/hll8_n0_cpp.sk
and /dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll8_n1000000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll8_n1000000_cpp.sk
deleted file mode 100644
index 67ec831..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll8_n1000000_cpp.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll8_n100000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll8_n100000_cpp.sk
deleted file mode 100644
index 0210961..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll8_n100000_cpp.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll8_n10000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll8_n10000_cpp.sk
deleted file mode 100644
index a5c8a15..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll8_n10000_cpp.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/cpp_generated_files/hll8_n1000_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll8_n1000_cpp.sk
deleted file mode 100644
index dd703ec..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll8_n1000_cpp.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll8_n100_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll8_n100_cpp.sk
deleted file mode 100644
index c2bfa48..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll8_n100_cpp.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll8_n10_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll8_n10_cpp.sk
deleted file mode 100644
index 86277b3..0000000
Binary files
a/tests/serialization_test_data/cpp_generated_files/hll8_n10_cpp.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll8_n1_cpp.sk
b/tests/serialization_test_data/cpp_generated_files/hll8_n1_cpp.sk
deleted file mode 100644
index fd3a973..0000000
--- a/tests/serialization_test_data/cpp_generated_files/hll8_n1_cpp.sk
+++ /dev/null
@@ -1 +0,0 @@
-���
\ No newline at end of file
diff --git a/tests/serialization_test_data/java_generated_files/hll4_n0_java.sk
b/tests/serialization_test_data/java_generated_files/hll4_n0_java.sk
deleted file mode 100644
index 074868c..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll4_n0_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll4_n1000000_java.sk
b/tests/serialization_test_data/java_generated_files/hll4_n1000000_java.sk
deleted file mode 100644
index 9a8c626..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll4_n1000000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll4_n100000_java.sk
b/tests/serialization_test_data/java_generated_files/hll4_n100000_java.sk
deleted file mode 100644
index 2b4b4dc..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll4_n100000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll4_n10000_java.sk
b/tests/serialization_test_data/java_generated_files/hll4_n10000_java.sk
deleted file mode 100644
index e97b181..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll4_n10000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll4_n1000_java.sk
b/tests/serialization_test_data/java_generated_files/hll4_n1000_java.sk
deleted file mode 100644
index a6b5f9a..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll4_n1000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll4_n100_java.sk
b/tests/serialization_test_data/java_generated_files/hll4_n100_java.sk
deleted file mode 100644
index 0e12ff4..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll4_n100_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll4_n10_java.sk
b/tests/serialization_test_data/java_generated_files/hll4_n10_java.sk
deleted file mode 100644
index e4ed655..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll4_n10_java.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/java_generated_files/hll4_n1_java.sk
b/tests/serialization_test_data/java_generated_files/hll4_n1_java.sk
deleted file mode 100644
index 1f4f6ee..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll4_n1_java.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/java_generated_files/hll6_n0_java.sk
b/tests/serialization_test_data/java_generated_files/hll6_n0_java.sk
deleted file mode 100644
index c31bd26..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll6_n0_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll6_n1000000_java.sk
b/tests/serialization_test_data/java_generated_files/hll6_n1000000_java.sk
deleted file mode 100644
index 42462d2..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll6_n1000000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll6_n100000_java.sk
b/tests/serialization_test_data/java_generated_files/hll6_n100000_java.sk
deleted file mode 100644
index 767f24f..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll6_n100000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll6_n10000_java.sk
b/tests/serialization_test_data/java_generated_files/hll6_n10000_java.sk
deleted file mode 100644
index 258da08..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll6_n10000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll6_n1000_java.sk
b/tests/serialization_test_data/java_generated_files/hll6_n1000_java.sk
deleted file mode 100644
index 274e8b7..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll6_n1000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll6_n100_java.sk
b/tests/serialization_test_data/java_generated_files/hll6_n100_java.sk
deleted file mode 100644
index 71fc293..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll6_n100_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll6_n10_java.sk
b/tests/serialization_test_data/java_generated_files/hll6_n10_java.sk
deleted file mode 100644
index 2a22d91..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll6_n10_java.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/java_generated_files/hll6_n1_java.sk
b/tests/serialization_test_data/java_generated_files/hll6_n1_java.sk
deleted file mode 100644
index 3f2f545..0000000
--- a/tests/serialization_test_data/java_generated_files/hll6_n1_java.sk
+++ /dev/null
@@ -1 +0,0 @@
-���
\ No newline at end of file
diff --git a/tests/serialization_test_data/java_generated_files/hll8_n0_java.sk
b/tests/serialization_test_data/java_generated_files/hll8_n0_java.sk
deleted file mode 100644
index 5ecf871..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll8_n0_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll8_n1000000_java.sk
b/tests/serialization_test_data/java_generated_files/hll8_n1000000_java.sk
deleted file mode 100644
index e7f5f48..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll8_n1000000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll8_n100000_java.sk
b/tests/serialization_test_data/java_generated_files/hll8_n100000_java.sk
deleted file mode 100644
index c634e49..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll8_n100000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll8_n10000_java.sk
b/tests/serialization_test_data/java_generated_files/hll8_n10000_java.sk
deleted file mode 100644
index f8dfdb0..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll8_n10000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll8_n1000_java.sk
b/tests/serialization_test_data/java_generated_files/hll8_n1000_java.sk
deleted file mode 100644
index 761477d..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll8_n1000_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll8_n100_java.sk
b/tests/serialization_test_data/java_generated_files/hll8_n100_java.sk
deleted file mode 100644
index c2bfa48..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll8_n100_java.sk and
/dev/null differ
diff --git
a/tests/serialization_test_data/java_generated_files/hll8_n10_java.sk
b/tests/serialization_test_data/java_generated_files/hll8_n10_java.sk
deleted file mode 100644
index 86277b3..0000000
Binary files
a/tests/serialization_test_data/java_generated_files/hll8_n10_java.sk and
/dev/null differ
diff --git a/tests/serialization_test_data/java_generated_files/hll8_n1_java.sk
b/tests/serialization_test_data/java_generated_files/hll8_n1_java.sk
deleted file mode 100644
index fd3a973..0000000
--- a/tests/serialization_test_data/java_generated_files/hll8_n1_java.sk
+++ /dev/null
@@ -1 +0,0 @@
-���
\ No newline at end of file
diff --git a/tools/generate_serialization_test_data.py
b/tools/generate_serialization_test_data.py
new file mode 100755
index 0000000..512c02f
--- /dev/null
+++ b/tools/generate_serialization_test_data.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import subprocess
+import sys
+import shutil
+import argparse
+from pathlib import Path
+
+def check_command_installed(command):
+ """Checks if a command is available in the system path."""
+ if shutil.which(command) is None:
+ print(f"Error: '{command}' is not installed or not in PATH.")
+ sys.exit(1)
+
+
+def run_command(command, cwd=None, shell=False):
+ """Runs a shell command, streaming output to stdout/stderr."""
+ cmd_str = ' '.join(command) if isinstance(command, list) else command
+ print(f"Running: {cmd_str}")
+ sys.stdout.flush() # Ensure 'Running' message appears before command output
+ try:
+ # Don't capture output; let it stream to sys.stdout/sys.stderr
+ subprocess.check_call(command, cwd=cwd, stderr=subprocess.STDOUT,
shell=shell)
+ except subprocess.CalledProcessError as e:
+ print(f"Error running command: {e}")
+ print("--- OUTPUT ---")
+ print(e.stdout)
+ print("--- END OUTPUT ---")
+ sys.exit(1)
+
+
+def generate_java_files(project_root):
+ print("--- Generating Java Test Data ---")
+
+ # 1. Check prerequisites
+ check_command_installed("git")
+ check_command_installed("java")
+ mvn_cmd_name = "mvn"
+ if os.name == 'nt':
+ mvn_cmd_name = "mvn.cmd"
+ check_command_installed(mvn_cmd_name)
+
+ # 2. Define paths
+ temp_dir = project_root / "tmp_datasketches_java"
+ output_dir = project_root / "tests" / "serialization_test_data" /
"java_generated_files"
+
+ # 3. Setup temporary directory
+ if temp_dir.exists():
+ print(f"Removing existing temporary directory: {temp_dir}")
+ shutil.rmtree(temp_dir)
+
+ temp_dir.mkdir()
+
+ # 4. Clone repository
+ repo_url = "https://github.com/apache/datasketches-java.git"
+ run_command(["git", "clone", repo_url, str(temp_dir)])
+
+ # 5. Run Maven to generate files
+ mvn_cmd = ["mvn", "test", "-P", "generate-java-files"]
+ use_shell = False
+ if os.name == 'nt': # Windows
+ mvn_cmd[0] = "mvn.cmd"
+ use_shell = True
+
+ run_command(mvn_cmd, cwd=temp_dir, shell=use_shell)
+
+ # 6. Copy generated files
+ generated_files_dir = temp_dir / "serialization_test_data" /
"java_generated_files"
+
+ if not generated_files_dir.exists():
+ print(f"Error: Expected generated files directory not found at
{generated_files_dir}")
+ sys.exit(1)
+
+ print(f"Copying files from {generated_files_dir} to {output_dir}")
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ files_copied = 0
+ for file_path in generated_files_dir.glob("*.sk"):
+ shutil.copy2(file_path, output_dir)
+ print(f"Copied: {file_path.name}")
+ files_copied += 1
+
+ if files_copied == 0:
+ print("Warning: No .sk files were found to copy.")
+ else:
+ print(f"Successfully copied {files_copied} files.")
+
+
+def generate_cpp_files(project_root):
+ print("--- Generating C++ Test Data ---")
+
+ # 1. Check prerequisites
+ check_command_installed("git")
+ check_command_installed("cmake")
+ check_command_installed("ctest")
+
+ # 2. Define paths
+ temp_dir = project_root / "tmp_datasketches_cpp"
+ output_dir = project_root / "tests" / "serialization_test_data" /
"cpp_generated_files"
+
+ # 3. Setup temporary directory
+ if temp_dir.exists():
+ print(f"Removing existing temporary directory: {temp_dir}")
+ shutil.rmtree(temp_dir)
+
+ temp_dir.mkdir()
+
+ # 4. Clone repository
+ repo_url = "https://github.com/apache/datasketches-cpp.git"
+ run_command(["git", "clone", repo_url, str(temp_dir)])
+
+ # 5. Build and Run CMake
+ build_dir = temp_dir / "build"
+ build_dir.mkdir(exist_ok=True)
+
+ # Configure: Add CMAKE_BUILD_TYPE for single-config generators (Ninja/Make)
+ run_command(["cmake", "..", "-DGENERATE=true",
"-DCMAKE_BUILD_TYPE=Release"], cwd=build_dir)
+
+ # Build: Release config
+ run_command(["cmake", "--build", ".", "--config", "Release"],
cwd=build_dir)
+
+ # Test: Use ctest which is more portable than 'cmake --target test' (VS
uses RUN_TESTS)
+ # --output-on-failure helps debug if a specific test fails
+ run_command(["ctest", "-C", "Release", "--output-on-failure"],
cwd=build_dir)
+
+ # 6. Copy generated files
+ # The instructions say: cp datasketches-cpp/build/*/test/*_cpp.sk
serialization_test_data/cpp_generated_files
+ # We need to find where they are exactly.
+ # It seems they might be in build/test/ or subdirectories depending on
generator.
+
+ print(f"Copying files to {output_dir}")
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ files_copied = 0
+ # Search recursively in build directory for *_cpp.sk
+ for file_path in build_dir.rglob("*_cpp.sk"):
+ # Avoid copying from CMakeFiles or other intermediate dirs if
possible, but the pattern is specific enough
+ shutil.copy2(file_path, output_dir)
+ print(f"Copied: {file_path.name}")
+ files_copied += 1
+
+ if files_copied == 0:
+ print("Warning: No *_cpp.sk files were found to copy.")
+ else:
+ print(f"Successfully copied {files_copied} files.")
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Generate serialization test
data for Java and/or C++.")
+ parser.add_argument("--java", action="store_true", help="Generate Java
test data")
+ parser.add_argument("--cpp", action="store_true", help="Generate C++ test
data")
+ parser.add_argument("--all", action="store_true", help="Generate both Java
and C++ test data")
+
+ args = parser.parse_args()
+
+ # Default to all if no arguments provided
+ if not args.java and not args.cpp and not args.all:
+ args.all = True
+
+ script_dir = Path(__file__).resolve().parent
+ project_root = script_dir.parent
+
+ if args.java or args.all:
+ generate_java_files(project_root)
+
+ if args.cpp or args.all:
+ generate_cpp_files(project_root)
+
+if __name__ == "__main__":
+ main()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]