This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new b6240b32e2 Respect `PARQUET_TEST_DATA` in variant_interop test (#7747)
b6240b32e2 is described below
commit b6240b32e235d4ca330372e3be31f784ba133252
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jun 24 18:07:12 2025 -0400
Respect `PARQUET_TEST_DATA` in variant_interop test (#7747)
# Which issue does this PR close?
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax.
- closes https://github.com/apache/arrow-rs/issues/7746
# Rationale for this change
The parquet-variant tests fail when run as part of
`verify-release-candidate.sh` due to the `parquet-testing` directory
being checked out in a different location
# What changes are included in this PR?
Update the test to look at the `PARQUET_TEST_DATA` environment variable
as well
# How are these changes tested?
I tested this manually:
```shell
# note this is a different name than the submodule:
git clone https://github.com/apache/parquet-testing.git parquet-testing-data
export PARQUET_TEST_DATA=$PWD/parquet-testing-data/data
# checkout my fork
git clone https://github.com/alamb/arrow-rs.git
cd arrow-rs
# This fails on main
git checkout main
cargo test -p parquet-variant
# PASSES on branch with fix
git checkout alamb/fix_variant_tests
cargo test -p parquet-variant
```
# Are there any user-facing changes?
No this is a test only change
---
parquet-variant/tests/variant_interop.rs | 54 +++++++++++++++++++++++++++++---
1 file changed, 49 insertions(+), 5 deletions(-)
diff --git a/parquet-variant/tests/variant_interop.rs
b/parquet-variant/tests/variant_interop.rs
index be63357422..dc19d99737 100644
--- a/parquet-variant/tests/variant_interop.rs
+++ b/parquet-variant/tests/variant_interop.rs
@@ -18,21 +18,65 @@
//! End-to-end check: (almost) every sample from apache/parquet-testing/variant
//! can be parsed into our `Variant`.
-// NOTE: We keep this file separate rather than a test mod inside variant.rs
because it should be
-// moved to the test folder later
-use std::fs;
use std::path::{Path, PathBuf};
+use std::{env, fs};
use chrono::NaiveDate;
use parquet_variant::{
ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4,
VariantDecimal8,
};
+/// Returns a directory path for the parquet variant test data.
+///
+/// The data lives in the `parquet-testing` git repository:
+/// <https://github.com/apache/parquet-testing>
+///
+/// Normally this is checked out as a git submodule in the root of the
`arrow-rs` repository,
+/// so the relative path is
+/// * `CARGO_MANIFEST_DIR/../parquet-testing/variant`.
+///
+/// However, the user can override this by setting the environment variable
`PARQUET_TEST_DATA`
+/// to point to a different directory (as is done by the
`verify-release-candidate.sh` script).
+///
+/// In this case, the environment variable `PARQUET_TEST_DATA` is expected to
point to a directory
+/// `parquet-testing/data`, so the relative path to the `variant` subdirectory
is
+/// * `PARQUET_TEST_DATA/../variant`.
fn cases_dir() -> PathBuf {
- Path::new(env!("CARGO_MANIFEST_DIR"))
+ // which we expect to point at "../parquet-testing/data"
+ let env_name = "PARQUET_TEST_DATA";
+ if let Ok(dir) = env::var(env_name) {
+ let trimmed = dir.trim();
+ if !trimmed.is_empty() {
+ let pb = PathBuf::from(trimmed).join("..").join("variant");
+ if pb.is_dir() {
+ return pb;
+ } else {
+ panic!(
+ "Can't find variant data at `{pb:?}`. Used value of env
`{env_name}`../variant ",
+ )
+ }
+ }
+ }
+
+ // PARQUET_TEST_DATA is undefined or its value is trimmed to empty, let's
try default dir.
+
+ // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of
your package",
+ // set by `cargo run` or `cargo test`, see:
+ // https://doc.rust-lang.org/cargo/reference/environment-variables.html
+ let pb = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("parquet-testing")
- .join("variant")
+ .join("variant");
+
+ if pb.is_dir() {
+ pb
+ } else {
+ panic!(
+ "env `{env_name}` is undefined or has empty value, and \
+ `CARGO_MANIFEST_DIR/../parquet-testing/variant` is not a
directory: `{pb:?}`\n\
+ HINT: try running `git submodule update --init`",
+ )
+ }
}
struct Case {