This is an automated email from the ASF dual-hosted git repository.
scovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new d99043e3c3 [Variant] Enahcne bracket access for VariantPath (#9479)
d99043e3c3 is described below
commit d99043e3c3a30f283cc2b3332770f8e65e8d9d8e
Author: Congxian Qiu <[email protected]>
AuthorDate: Tue Mar 3 05:49:08 2026 +0800
[Variant] Enahcne bracket access for VariantPath (#9479)
# Which issue does this PR close?
- Closes #9478 .
# What changes are included in this PR?
- Fix the typo
- Enhance the bracket access for the variant path
# Are these changes tested?
- Add some tests to cover the logic
# Are there any user-facing changes?
No
---
parquet-variant/src/path.rs | 33 +++++++++++++++++++++++++++++----
parquet-variant/src/utils.rs | 25 ++++++++++++++++++++-----
2 files changed, 49 insertions(+), 9 deletions(-)
diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs
index fe10d0451d..8e68d9efad 100644
--- a/parquet-variant/src/path.rs
+++ b/parquet-variant/src/path.rs
@@ -75,14 +75,15 @@ use std::{borrow::Cow, ops::Deref};
/// assert_eq!(path[1], VariantPathElement::field("bar"));
/// ```
///
-/// # Example: Accessing filed with bracket
+/// # Example: Accessing field with bracket
/// ```
/// # use parquet_variant::{VariantPath, VariantPathElement};
-/// let path = VariantPath::try_from("a[b.c].d[2]").unwrap();
+/// let path = VariantPath::try_from("a['b.c'].d[2]['3']").unwrap();
/// let expected = VariantPath::from_iter([VariantPathElement::field("a"),
/// VariantPathElement::field("b.c"),
/// VariantPathElement::field("d"),
-/// VariantPathElement::index(2)]);
+/// VariantPathElement::index(2),
+/// VariantPathElement::field("3")]);
/// assert_eq!(path, expected)
#[derive(Debug, Clone, PartialEq, Default)]
pub struct VariantPath<'a>(Vec<VariantPathElement<'a>>);
@@ -287,11 +288,22 @@ mod tests {
assert_eq!(path, expected);
// invalid index will be treated as field
- let path = VariantPath::try_from("foo.bar[abc]").unwrap();
+ let path = VariantPath::try_from("foo.bar['abc'][\"def\"]").unwrap();
let expected = VariantPath::from_iter([
VariantPathElement::field("foo"),
VariantPathElement::field("bar"),
VariantPathElement::field("abc"),
+ VariantPathElement::field("def"),
+ ]);
+ assert_eq!(path, expected);
+
+ // a number quoted with `'` is treated as field, not index
+ let path = VariantPath::try_from("foo['0'].bar[\"1\"]").unwrap();
+ let expected = VariantPath::from_iter([
+ VariantPathElement::field("foo"),
+ VariantPathElement::field("0"),
+ VariantPathElement::field("bar"),
+ VariantPathElement::field("1"),
]);
assert_eq!(path, expected);
}
@@ -321,5 +333,18 @@ mod tests {
// No '[' before ']'
let err = VariantPath::try_from("foo.bar]baz").unwrap_err();
assert_eq!(err.to_string(), "Parser error: Unexpected ']' at byte 7");
+
+ // Invalid number(without quote) parse
+ let err = VariantPath::try_from("foo.bar[123abc]").unwrap_err();
+ assert_eq!(
+ err.to_string(),
+ "Parser error: Invalid token in bracket request: `123abc`.
Expected a quoted string or a number(e.g., `['field']` or `[123]`)"
+ );
+
+ let err = VariantPath::try_from("foo.bar[abc]").unwrap_err();
+ assert_eq!(
+ err.to_string(),
+ "Parser error: Invalid token in bracket request: `abc`. Expected a
quoted string or a number(e.g., `['field']` or `[123]`)"
+ );
}
}
diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs
index 0984a601b2..85d79ed8ae 100644
--- a/parquet-variant/src/utils.rs
+++ b/parquet-variant/src/utils.rs
@@ -170,9 +170,10 @@ pub(crate) fn fits_precision<const N: u32>(n: impl
Into<i64>) -> bool {
/// - `"foo"` -> single field `foo`
/// - `"foo.bar"` -> nested fields `foo`, `bar`
/// - `"[1]"` -> array index 1
+/// - `"['1']"` or `"["1"]"`-> field `1`
/// - `"foo[1].bar"` -> field `foo`, index 1, field `bar`
-/// - `"[a.b]"` -> field `a.b` (dot is literal inside bracket)
-/// - `"[a\\]b]"` -> field `a]b` (escaped `]`
+/// - `"['a.b']"` -> field `a.b` (dot is literal inside bracket)
+/// - `"['a\]b']"` -> field `a]b` (escaped `]`
/// - etc.
///
/// # Errors
@@ -267,9 +268,23 @@ fn parse_in_bracket(s: &str, i: usize) ->
Result<(VariantPathElement<'_>, usize)
}
};
- let element = match unescaped.parse() {
- Ok(idx) => VariantPathElement::index(idx),
- Err(_) => VariantPathElement::field(unescaped),
+ let element = if let Some(inner) = unescaped
+ .strip_prefix('\'')
+ .and_then(|s| s.strip_suffix('\''))
+ .or_else(|| {
+ unescaped
+ .strip_prefix('"')
+ .and_then(|s| s.strip_suffix('"'))
+ }) {
+ // Quoted field name, e.g., ['field'] or ['123'] or ["123"]
+ VariantPathElement::field(inner.to_string())
+ } else {
+ let Ok(idx) = unescaped.parse() else {
+ return Err(ArrowError::ParseError(format!(
+ "Invalid token in bracket request: `{unescaped}`. Expected a
quoted string or a number(e.g., `['field']` or `[123]`)"
+ )));
+ };
+ VariantPathElement::index(idx)
};
Ok((element, end + 1))