2010YOUY01 commented on code in PR #560:
URL: https://github.com/apache/sedona-db/pull/560#discussion_r2758196534
##########
rust/sedona-geoparquet/src/format.rs:
##########
@@ -222,38 +339,66 @@ impl FileFormat for GeoParquetFormat {
}
}
- if let Some(geo_metadata) = geoparquet_metadata {
- let new_fields: Result<Vec<_>> = inner_schema_without_metadata
- .fields()
- .iter()
- .map(|field| {
- if let Some(geo_column) =
geo_metadata.columns.get(field.name()) {
- match geo_column.encoding {
- GeoParquetColumnEncoding::WKB => {
- let extension = ExtensionType::new(
- "geoarrow.wkb",
- field.data_type().clone(),
- Some(geo_column.to_geoarrow_metadata()?),
- );
- Ok(Arc::new(
- extension.to_field(field.name(),
field.is_nullable()),
- ))
- }
- _ => plan_err!(
- "Unsupported GeoParquet encoding: {}",
- geo_column.encoding
- ),
+ // Geometry columns have been inferred from metadata, next combine
column
+ // metadata from options with the inferred ones
+ let mut inferred_geo_cols = match geoparquet_metadata {
+ Some(geo_metadata) => geo_metadata.columns,
+ None => HashMap::new(),
+ };
+
+ if let Some(geometry_columns) = &self.options.geometry_columns {
+ merge_geometry_columns(&mut inferred_geo_cols, geometry_columns)?;
+ }
+
+ if inferred_geo_cols.is_empty() {
+ return Ok(inner_schema_without_metadata);
+ }
+
+ let mut remaining: HashSet<String> =
inferred_geo_cols.keys().cloned().collect();
+ let new_fields: Result<Vec<_>> = inner_schema_without_metadata
+ .fields()
+ .iter()
+ .map(|field| {
+ if let Some(geo_column) = inferred_geo_cols.get(field.name()) {
+ remaining.remove(field.name());
+ let encoding = match geo_column.encoding {
+ Some(encoding) => encoding,
+ None => {
+ return plan_err!(
+ "GeoParquet column '{}' missing required field
'encoding'",
+ field.name()
+ )
}
- } else {
- Ok(field.clone())
+ };
+ match encoding {
+ GeoParquetColumnEncoding::WKB => {
+ let extension = ExtensionType::new(
+ "geoarrow.wkb",
+ field.data_type().clone(),
+ Some(geo_column.to_geoarrow_metadata()?),
+ );
+ Ok(Arc::new(
+ extension.to_field(field.name(),
field.is_nullable()),
+ ))
+ }
+ _ => plan_err!("Unsupported GeoParquet encoding: {}",
encoding),
}
- })
- .collect();
+ } else {
+ Ok(field.clone())
+ }
+ })
+ .collect();
- Ok(Arc::new(Schema::new(new_fields?)))
- } else {
- Ok(inner_schema_without_metadata)
+ if !remaining.is_empty() {
+ let mut missing: Vec<_> = remaining.into_iter().collect();
+ missing.sort();
+ return plan_err!(
+ "Geometry columns not found in schema: {}",
+ missing.join(", ")
+ );
Review Comment:
Added a test case to cover this in
[dd75822](https://github.com/apache/sedona-db/pull/560/commits/dd75822c1bf9e39c17e68ad3c2a70f8c19542acb)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]