This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new b6240b32e2 Respect `PARQUET_TEST_DATA` in variant_interop test (#7747)
b6240b32e2 is described below

commit b6240b32e235d4ca330372e3be31f784ba133252
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jun 24 18:07:12 2025 -0400

    Respect `PARQUET_TEST_DATA` in variant_interop test (#7747)
    
    # Which issue does this PR close?
    
    We generally require a GitHub issue to be filed for all bug fixes and
    enhancements and this helps us generate change logs for our releases.
    You can link an issue to this PR using the GitHub syntax.
    
    - closes https://github.com/apache/arrow-rs/issues/7746
    
    # Rationale for this change
    
    The parquet-variant tests fail when run as part of
    `verify-release-candidate.sh` due to the `parquet-testing` directory
    being checked out in a different location
    
    # What changes are included in this PR?
    
    Update the test to look at the `PARQUET_TEST_DATA` environment variable
    as well
    
    # How are these changes tested?
    
    I tested this manually:
    ```shell
    # note this is a different name than the submodule:
    git clone https://github.com/apache/parquet-testing.git parquet-testing-data
    export PARQUET_TEST_DATA=$PWD/parquet-testing-data/data
    # checkout my fork
    git clone https://github.com/alamb/arrow-rs.git
    cd arrow-rs
    # This fails on main
    git checkout main
    cargo test -p parquet-variant
    # PASSES on branch with fix
    git checkout alamb/fix_variant_tests
    cargo test -p parquet-variant
    ```
    
    # Are there any user-facing changes?
    No this is a test only change
---
 parquet-variant/tests/variant_interop.rs | 54 +++++++++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 5 deletions(-)

diff --git a/parquet-variant/tests/variant_interop.rs 
b/parquet-variant/tests/variant_interop.rs
index be63357422..dc19d99737 100644
--- a/parquet-variant/tests/variant_interop.rs
+++ b/parquet-variant/tests/variant_interop.rs
@@ -18,21 +18,65 @@
 //! End-to-end check: (almost) every sample from apache/parquet-testing/variant
 //! can be parsed into our `Variant`.
 
-// NOTE: We keep this file separate rather than a test mod inside variant.rs 
because it should be
-// moved to the test folder later
-use std::fs;
 use std::path::{Path, PathBuf};
+use std::{env, fs};
 
 use chrono::NaiveDate;
 use parquet_variant::{
     ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, 
VariantDecimal8,
 };
 
+/// Returns a directory path for the parquet variant test data.
+///
+/// The data lives in the `parquet-testing` git repository:
+/// <https://github.com/apache/parquet-testing>
+///
+/// Normally this is checked out as a git submodule in the root of the 
`arrow-rs` repository,
+/// so the relative path is
+/// * `CARGO_MANIFEST_DIR/../parquet-testing/variant`.
+///
+/// However, the user can override this by setting the environment variable 
`PARQUET_TEST_DATA`
+/// to point to a different directory (as is done by the 
`verify-release-candidate.sh` script).
+///
+/// In this case, the environment variable `PARQUET_TEST_DATA` is expected to 
point to a directory
+/// `parquet-testing/data`, so the relative path to the `variant` subdirectory 
is
+/// * `PARQUET_TEST_DATA/../variant`.
 fn cases_dir() -> PathBuf {
-    Path::new(env!("CARGO_MANIFEST_DIR"))
+    // which we expect to point at "../parquet-testing/data"
+    let env_name = "PARQUET_TEST_DATA";
+    if let Ok(dir) = env::var(env_name) {
+        let trimmed = dir.trim();
+        if !trimmed.is_empty() {
+            let pb = PathBuf::from(trimmed).join("..").join("variant");
+            if pb.is_dir() {
+                return pb;
+            } else {
+                panic!(
+                    "Can't find variant data at `{pb:?}`. Used value of env 
`{env_name}`../variant ",
+                )
+            }
+        }
+    }
+
+    // PARQUET_TEST_DATA is undefined or its value is trimmed to empty, let's 
try default dir.
+
+    // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of 
your package",
+    // set by `cargo run` or `cargo test`, see:
+    // https://doc.rust-lang.org/cargo/reference/environment-variables.html
+    let pb = Path::new(env!("CARGO_MANIFEST_DIR"))
         .join("..")
         .join("parquet-testing")
-        .join("variant")
+        .join("variant");
+
+    if pb.is_dir() {
+        pb
+    } else {
+        panic!(
+            "env `{env_name}` is undefined or has empty value, and \
+             `CARGO_MANIFEST_DIR/../parquet-testing/variant` is not a 
directory: `{pb:?}`\n\
+             HINT: try running `git submodule update --init`",
+        )
+    }
 }
 
 struct Case {

Reply via email to