This is an automated email from the ASF dual-hosted git repository. imbruced pushed a commit to branch feature/geopackage-reader in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 6fe83e072fc98b4d67f6cb0669d79fc52caf9940 Author: pawelkocinski <[email protected]> AuthorDate: Mon Sep 23 23:00:58 2024 +0200 Add other missing data types. --- .../src/test/resources}/geopackage/example.gpkg | Bin .../src/test/resources/geopackage/features.gpkg | Bin 0 -> 29384704 bytes .../src/test/resources/geopackage/raster.gpkg | Bin 0 -> 9519104 bytes .../geopackage/GeoPackageDataSource.scala | 38 ++-- .../geopackage/GeoPackageLoadOptions.scala | 29 ++- .../geopackage/GeoPackageMetadataReader.scala | 22 ++- .../geopackage/GeoPackagePartitionReader.scala | 36 ++-- .../GeoPackagePartitionReaderFactory.scala | 38 ++-- .../datasources/geopackage/GeoPackageScan.scala | 35 +++- .../geopackage/GeoPackageScanBuilder.scala | 32 +++- .../datasources/geopackage/GeoPackageTable.scala | 39 ++-- .../connection/GeoPackageConnectionManager.scala | 55 ++++-- .../geopackage/errors/GeopackageException.scala | 25 +++ .../datasources/geopackage/model/Envelope.scala | 25 ++- .../geopackage/model/GeoPackageField.scala | 29 ++- .../geopackage/model/GeoPackageType.scala | 18 ++ .../geopackage/model/ImageFileFormat.scala | 18 ++ .../geopackage/model/PartitionOptions.scala | 43 +++-- .../datasources/geopackage/model/TableType.scala | 18 ++ .../datasources/geopackage/model/TileMatrix.scala | 27 ++- .../geopackage/model/TileMetadata.scala | 40 +++-- .../geopackage/model/TileRowMetadata.scala | 24 ++- .../transform/DataTypesTransformations.scala | 18 ++ .../geopackage/transform/GeometryReader.scala | 100 ++++++++--- .../datasources/geopackage/transform/Image.scala | 48 +++-- .../geopackage/transform/ValuesMapper.scala | 126 +++++++------ .../apache/sedona/sql/GeoPackageReaderTest.scala | 198 ++++++++++++++------- .../org/apache/sedona/sql/TestBaseScala.scala | 1 + 28 files changed, 770 insertions(+), 312 deletions(-) diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/example.gpkg b/spark/common/src/test/resources/geopackage/example.gpkg similarity index 100% rename from spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/example.gpkg rename to spark/common/src/test/resources/geopackage/example.gpkg diff --git a/spark/common/src/test/resources/geopackage/features.gpkg b/spark/common/src/test/resources/geopackage/features.gpkg new file mode 100644 index 000000000..4e7585277 Binary files /dev/null and b/spark/common/src/test/resources/geopackage/features.gpkg differ diff --git a/spark/common/src/test/resources/geopackage/raster.gpkg b/spark/common/src/test/resources/geopackage/raster.gpkg new file mode 100644 index 000000000..0c7cdedec Binary files /dev/null and b/spark/common/src/test/resources/geopackage/raster.gpkg differ diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageDataSource.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageDataSource.scala index 868bcc76d..1e0aedaaa 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageDataSource.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageDataSource.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage import org.apache.sedona.sql.datasources.geopackage.connection.GeoPackageConnectionManager @@ -9,7 +27,7 @@ import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 import org.apache.spark.sql.sources.DataSourceRegister import org.apache.spark.sql.util.CaseInsensitiveStringMap -class GeoPackageDataSource extends FileDataSourceV2 with DataSourceRegister{ +class GeoPackageDataSource extends FileDataSourceV2 with DataSourceRegister { override def fallbackFileFormat: Class[_ <: FileFormat] = { null @@ -25,8 +43,7 @@ class GeoPackageDataSource extends FileDataSourceV2 with DataSourceRegister{ Seq(loadOptions.path), None, fallbackFileFormat, - loadOptions - ) + loadOptions) } private def getLoadOptions(options: CaseInsensitiveStringMap): GeoPackageLoadOptions = { @@ -52,16 +69,11 @@ class GeoPackageDataSource extends FileDataSourceV2 with DataSourceRegister{ .getSchema(path, tableName) GeoPackageLoadOptions( - path=path, - showMetadata=showMetadata, - tableName=tableName, - tableType=getTableType( - showMetadata=showMetadata, - path=path, - tableName=tableName - ), - fields=fields - ) + path = path, + showMetadata = showMetadata, + tableName = tableName, + tableType = getTableType(showMetadata = showMetadata, path = path, tableName = tableName), + fields = fields) } private def getTableType(showMetadata: Boolean, path: String, tableName: String): TableType = { diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageLoadOptions.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageLoadOptions.scala index 2aaa0eb67..6e661bebf 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageLoadOptions.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageLoadOptions.scala @@ -1,12 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage import org.apache.sedona.sql.datasources.geopackage.model.GeoPackageField import org.apache.sedona.sql.datasources.geopackage.model.TableType.TableType case class GeoPackageLoadOptions( - path: String, - showMetadata: Boolean, - tableName: String, - tableType: TableType, - fields: Seq[GeoPackageField] -) + path: String, + showMetadata: Boolean, + tableName: String, + tableType: TableType, + fields: Seq[GeoPackageField]) diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageMetadataReader.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageMetadataReader.scala index fd1cc0bfc..84eb447a2 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageMetadataReader.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageMetadataReader.scala @@ -1,5 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage -class GeoPackageMetadataReader { - -} +class GeoPackageMetadataReader {} diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackagePartitionReader.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackagePartitionReader.scala index 79885730f..630162e5e 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackagePartitionReader.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackagePartitionReader.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage import org.apache.sedona.sql.datasources.geopackage.connection.GeoPackageConnectionManager @@ -10,17 +28,16 @@ import org.apache.spark.sql.connector.read.PartitionReader import java.sql.ResultSet case class GeoPackagePartitionReader( - var values: Seq[Any], - rs: ResultSet, - options: PartitionOptions -) extends PartitionReader[InternalRow] { + var values: Seq[Any], + rs: ResultSet, + options: PartitionOptions) + extends PartitionReader[InternalRow] { def this(partitionOptions: PartitionOptions) = { this( Seq.empty, GeoPackageConnectionManager.getTableCursor(partitionOptions), - partitionOptions - ) + partitionOptions) } override def next(): Boolean = { @@ -39,10 +56,9 @@ case class GeoPackagePartitionReader( case FEATURES | METADATA => options case TILES => val tileRowMetadata = TileRowMetadata( - zoomLevel=rs.getInt("zoom_level"), - tileColumn=rs.getInt("tile_column"), - tileRow=rs.getInt("tile_row"), - ) + zoomLevel = rs.getInt("zoom_level"), + tileColumn = rs.getInt("tile_column"), + tileRow = rs.getInt("tile_row")) options.withTileRowMetadata(tileRowMetadata) case UNKNOWN => options diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackagePartitionReaderFactory.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackagePartitionReaderFactory.scala index e960de496..d549ad17f 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackagePartitionReaderFactory.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackagePartitionReaderFactory.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage import org.apache.sedona.sql.datasources.geopackage.connection.GeoPackageConnectionManager @@ -5,30 +23,24 @@ import org.apache.sedona.sql.datasources.geopackage.model.{PartitionOptions, Tab import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory} -class GeoPackagePartitionReaderFactory( - loadOptions: GeoPackageLoadOptions, - ) extends PartitionReaderFactory { +class GeoPackagePartitionReaderFactory(loadOptions: GeoPackageLoadOptions) + extends PartitionReaderFactory { override def createReader(partition: InputPartition): PartitionReader[InternalRow] = { if (loadOptions.showMetadata) { - return new GeoPackagePartitionReader( - PartitionOptions.fromLoadOptions(loadOptions), - ) + return new GeoPackagePartitionReader(PartitionOptions.fromLoadOptions(loadOptions)) } loadOptions.tableType match { case TableType.FEATURES => - new GeoPackagePartitionReader( - PartitionOptions.fromLoadOptions(loadOptions), - ) + new GeoPackagePartitionReader(PartitionOptions.fromLoadOptions(loadOptions)) case TableType.TILES => - val tileMetadata = GeoPackageConnectionManager.findTilesMetadata( - loadOptions.path, loadOptions.tableName) + val tileMetadata = + GeoPackageConnectionManager.findTilesMetadata(loadOptions.path, loadOptions.tableName) new GeoPackagePartitionReader( - PartitionOptions.fromLoadOptions(loadOptions).withTile(tileMetadata), - ) + PartitionOptions.fromLoadOptions(loadOptions).withTile(tileMetadata)) } } } diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageScan.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageScan.scala index 47b34b443..289ad87e5 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageScan.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageScan.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage import org.apache.sedona.sql.datasources.geopackage.model.TableType.TableType @@ -8,15 +26,14 @@ import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex import org.apache.spark.sql.execution.datasources.v2.FileScan import org.apache.spark.sql.types.StructType -case class GeoPackageScan ( - dataSchema: StructType, - sparkSession: SparkSession, - fileIndex: PartitioningAwareFileIndex, - readDataSchema: StructType, - readPartitionSchema: StructType, - loadOptions: GeoPackageLoadOptions, - ) - extends FileScan { +case class GeoPackageScan( + dataSchema: StructType, + sparkSession: SparkSession, + fileIndex: PartitioningAwareFileIndex, + readDataSchema: StructType, + readPartitionSchema: StructType, + loadOptions: GeoPackageLoadOptions) + extends FileScan { override def partitionFilters: Seq[Expression] = { Seq.empty diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageScanBuilder.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageScanBuilder.scala index 5609cb26a..ada2f91a6 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageScanBuilder.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageScanBuilder.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage import org.apache.sedona.sql.datasources.geopackage.connection.GeoPackageConnectionManager @@ -9,11 +27,12 @@ import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap -class GeoPackageScanBuilder (sparkSession: SparkSession, - fileIndex: PartitioningAwareFileIndex, - dataSchema: StructType, - loadOptions: GeoPackageLoadOptions) - extends FileScanBuilder(sparkSession, fileIndex, dataSchema){ +class GeoPackageScanBuilder( + sparkSession: SparkSession, + fileIndex: PartitioningAwareFileIndex, + dataSchema: StructType, + loadOptions: GeoPackageLoadOptions) + extends FileScanBuilder(sparkSession, fileIndex, dataSchema) { override def build(): Scan = { GeoPackageScan( @@ -22,7 +41,6 @@ class GeoPackageScanBuilder (sparkSession: SparkSession, fileIndex, dataSchema, readPartitionSchema(), - loadOptions, - ) + loadOptions) } } diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageTable.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageTable.scala index 190a52bfe..8cb059b7b 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageTable.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/GeoPackageTable.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage import org.apache.hadoop.fs.FileStatus @@ -12,14 +30,14 @@ import org.apache.spark.sql.types.{StringType, StructField, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap case class GeoPackageTable( - name: String, - sparkSession: SparkSession, - options: CaseInsensitiveStringMap, - paths: Seq[String], - userSpecifiedSchema: Option[StructType], - fallbackFileFormat: Class[_ <: FileFormat], - loadOptions: GeoPackageLoadOptions) - extends FileTable(sparkSession, options, paths, userSpecifiedSchema) { + name: String, + sparkSession: SparkSession, + options: CaseInsensitiveStringMap, + paths: Seq[String], + userSpecifiedSchema: Option[StructType], + fallbackFileFormat: Class[_ <: FileFormat], + loadOptions: GeoPackageLoadOptions) + extends FileTable(sparkSession, options, paths, userSpecifiedSchema) { override def inferSchema(files: Seq[FileStatus]): Option[StructType] = { Some(getSchema) @@ -32,10 +50,7 @@ case class GeoPackageTable( override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = { val schema = userSpecifiedSchema.getOrElse(getSchema) - new GeoPackageScanBuilder(sparkSession, fileIndex, - schema, - loadOptions - ) + new GeoPackageScanBuilder(sparkSession, fileIndex, schema, loadOptions) } override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = { diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/connection/GeoPackageConnectionManager.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/connection/GeoPackageConnectionManager.scala index 8a076fb6f..5f98be3e4 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/connection/GeoPackageConnectionManager.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/connection/GeoPackageConnectionManager.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.connection import org.apache.sedona.sql.datasources.geopackage.model.{GeoPackageField, PartitionOptions, TableType, TileMatrix, TileMetadata} @@ -19,7 +37,8 @@ object GeoPackageConnectionManager { } def getTableCursor(partitionOptions: PartitionOptions): ResultSet = { - val conn: Connection = DriverManager.getConnection("jdbc:sqlite:" + partitionOptions.loadOptions.path) + val conn: Connection = + DriverManager.getConnection("jdbc:sqlite:" + partitionOptions.loadOptions.path) val stmt: Statement = conn.createStatement() stmt.executeQuery(s"SELECT * FROM ${partitionOptions.loadOptions.tableName}") } @@ -39,9 +58,7 @@ object GeoPackageConnectionManager { } fields - } - - finally { + } finally { closeStatement(statement) } } @@ -49,7 +66,8 @@ object GeoPackageConnectionManager { def findFeatureMetadata(path: String, tableName: String): TableType = { val statement = createStatement(path) - val rs = statement.executeQuery(s"select * from gpkg_contents where table_name = '$tableName'") + val rs = + statement.executeQuery(s"select * from gpkg_contents where table_name = '$tableName'") rs.getString("data_type") match { case "features" => TableType.FEATURES @@ -60,7 +78,8 @@ object GeoPackageConnectionManager { def getZoomLevelData(path: String, tableName: String): mutable.HashMap[Int, TileMatrix] = { val stmt = createStatement(path) - val rs = stmt.executeQuery(f"select * from gpkg_tile_matrix where table_name = '${tableName}'") + val rs = + stmt.executeQuery(f"select * from gpkg_tile_matrix where table_name = '${tableName}'") val result: mutable.HashMap[Int, TileMatrix] = mutable.HashMap() while (rs.next()) { val zoom_level = rs.getInt("zoom_level") @@ -71,7 +90,14 @@ object GeoPackageConnectionManager { val pixel_x_size = rs.getDouble("pixel_x_size") val pixel_y_size = rs.getDouble("pixel_y_size") - result(zoom_level) = TileMatrix(zoom_level, matrix_width, matrix_height, tile_width, tile_height, pixel_x_size, pixel_y_size) + result(zoom_level) = TileMatrix( + zoom_level, + matrix_width, + matrix_height, + tile_width, + tile_height, + pixel_x_size, + pixel_y_size) } result @@ -80,7 +106,8 @@ object GeoPackageConnectionManager { def findTilesMetadata(path: String, tableName: String): TileMetadata = { val statement = createStatement(path) - val rs = statement.executeQuery(s"select * from gpkg_tile_matrix_set where table_name = '$tableName'") + val rs = statement.executeQuery( + s"select * from gpkg_tile_matrix_set where table_name = '$tableName'") val minX = rs.getDouble("min_x") val minY = rs.getDouble("min_y") @@ -88,10 +115,7 @@ object GeoPackageConnectionManager { val maxY = rs.getDouble("max_y") val srsID = rs.getInt("srs_id") - val getZoomLevelData = GeoPackageConnectionManager.getZoomLevelData( - path, - tableName - ) + val getZoomLevelData = GeoPackageConnectionManager.getZoomLevelData(path, tableName) TileMetadata( tableName = tableName, @@ -99,11 +123,8 @@ object GeoPackageConnectionManager { minY = minY, maxX = maxX, maxY = maxY, - srsID=srsID, + srsID = srsID, zoomLevelMetadata = getZoomLevelData, - tileRowMetadata = null - ) + tileRowMetadata = null) } } - - diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/errors/GeopackageException.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/errors/GeopackageException.scala new file mode 100644 index 000000000..ae89baca7 --- /dev/null +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/errors/GeopackageException.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql.datasources.geopackage.errors + +class GeopackageException extends Exception { + def this(message: String) { + this() + } +} diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/Envelope.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/Envelope.scala index 1320fec8d..566e76ca8 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/Envelope.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/Envelope.scala @@ -1,8 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.model -case class Envelope( - minX: Double, - minY: Double, - maxX: Double, - maxY: Double -) +case class Envelope(minX: Double, minY: Double, maxX: Double, maxY: Double) diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/GeoPackageField.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/GeoPackageField.scala index 9bac47182..127c56ca5 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/GeoPackageField.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/GeoPackageField.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.model import org.apache.sedona.sql.datasources.geopackage.model.TableType.TableType @@ -11,7 +29,8 @@ case class GeoPackageField(name: String, dataType: String, isNullable: Boolean) def toStructField(tableType: TableType): StructField = { dataType match { - case startsWith: String if startsWith.startsWith(GeoPackageType.TEXT) => StructField(name, StringType) + case startsWith: String if startsWith.startsWith(GeoPackageType.TEXT) => + StructField(name, StringType) case startsWith: String if startsWith.startsWith(GeoPackageType.BLOB) => { if (tableType == TableType.TILES) { return StructField(name, RasterUDT) @@ -19,12 +38,8 @@ case class GeoPackageField(name: String, dataType: String, isNullable: Boolean) StructField(name, BinaryType) } - case - GeoPackageType.INTEGER | - GeoPackageType.INT | - GeoPackageType.SMALLINT | - GeoPackageType.TINY_INT | - GeoPackageType.MEDIUMINT => + case GeoPackageType.INTEGER | GeoPackageType.INT | GeoPackageType.SMALLINT | + GeoPackageType.TINY_INT | GeoPackageType.MEDIUMINT => StructField(name, IntegerType) case GeoPackageType.POINT => StructField(name, GeometryUDT) case GeoPackageType.LINESTRING => StructField(name, GeometryUDT) diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/GeoPackageType.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/GeoPackageType.scala index 8ae862cb9..e880f0618 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/GeoPackageType.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/GeoPackageType.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.model object GeoPackageType { diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/ImageFileFormat.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/ImageFileFormat.scala index 99798a8a2..fa68afc96 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/ImageFileFormat.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/ImageFileFormat.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.model object ImageFileFormat extends Enumeration { diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/PartitionOptions.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/PartitionOptions.scala index dc5785a34..2a9d18082 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/PartitionOptions.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/PartitionOptions.scala @@ -1,35 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.model import org.apache.sedona.sql.datasources.geopackage.GeoPackageLoadOptions case class PartitionOptions( - loadOptions: GeoPackageLoadOptions, - columns: Seq[GeoPackageField], - tile: Option[TileMetadata], -) { + loadOptions: GeoPackageLoadOptions, + columns: Seq[GeoPackageField], + tile: Option[TileMetadata]) { def withTile(tile: TileMetadata): PartitionOptions = { - PartitionOptions( - loadOptions, - columns, - Some(tile) - ) + PartitionOptions(loadOptions, columns, Some(tile)) } def withTileRowMetadata(tileRowMetadata: TileRowMetadata): PartitionOptions = { - PartitionOptions( - loadOptions, - columns, - tile.map(_.withTileRowMetadata(tileRowMetadata)) - ) + PartitionOptions(loadOptions, columns, tile.map(_.withTileRowMetadata(tileRowMetadata))) } } object PartitionOptions { def fromLoadOptions(loadOptions: GeoPackageLoadOptions): PartitionOptions = { - PartitionOptions( - loadOptions, - loadOptions.fields, - None - ) + PartitionOptions(loadOptions, loadOptions.fields, None) } } diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TableType.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TableType.scala index df66f15fb..774419232 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TableType.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TableType.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.model object TableType extends Enumeration { diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileMatrix.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileMatrix.scala index d0807d4cc..a947b1a78 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileMatrix.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileMatrix.scala @@ -1,3 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.model -case class TileMatrix(zoom_level: Int, matrix_width: Int, matrix_height: Int, tile_width: Int, tile_height: Int, pixel_x_size: Double, pixel_y_size: Double) +case class TileMatrix( + zoom_level: Int, + matrix_width: Int, + matrix_height: Int, + tile_width: Int, + tile_height: Int, + pixel_x_size: Double, + pixel_y_size: Double) diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileMetadata.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileMetadata.scala index 1a048089f..eb0856989 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileMetadata.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileMetadata.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.model import org.geotools.referencing.CRS @@ -6,15 +24,14 @@ import org.opengis.referencing.crs.CoordinateReferenceSystem import scala.collection.mutable case class TileMetadata( - tableName: String, - minX: Double, - minY: Double, - maxX: Double, - maxY: Double, - srsID: Int, - zoomLevelMetadata: mutable.HashMap[Int, TileMatrix], - tileRowMetadata: Option[TileRowMetadata] - ) { + tableName: String, + minX: Double, + minY: Double, + maxX: Double, + maxY: Double, + srsID: Int, + zoomLevelMetadata: mutable.HashMap[Int, TileMatrix], + tileRowMetadata: Option[TileRowMetadata]) { def withTileRowMetadata(tileRowMetadata: TileRowMetadata): TileMetadata = { TileMetadata( @@ -25,8 +42,7 @@ case class TileMetadata( maxY, srsID, zoomLevelMetadata, - Some(tileRowMetadata) - ) + Some(tileRowMetadata)) } def getSRID(): CoordinateReferenceSystem = { CRS.decode("EPSG:" + srsID) @@ -41,7 +57,6 @@ case class TileMetadata( val dify = (this.maxY - this.minY) - val minX = this.minX + (columnNumber * (this.maxX - this.minX) / numberOfColumns) val maxX = this.minX + ((columnNumber + 1) * (this.maxX - this.minX) / numberOfColumns) val minY = this.minY + ((numberOfRows - 1 - rowNumber) / numberOfRows.toDouble * dify) @@ -50,4 +65,3 @@ case class TileMetadata( Envelope(minX, minY, maxX, maxY) } } - diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileRowMetadata.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileRowMetadata.scala index 4803d0182..e3fcb4012 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileRowMetadata.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/model/TileRowMetadata.scala @@ -1,7 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.model -case class TileRowMetadata( - tileColumn: Int, - tileRow: Int, - zoomLevel: Int, -) +case class TileRowMetadata(tileColumn: Int, tileRow: Int, zoomLevel: Int) diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala index 4bd57423b..9a23f0a08 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.transform import java.time.{Instant, LocalDate} diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/GeometryReader.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/GeometryReader.scala index 40aeec20a..55c950dac 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/GeometryReader.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/GeometryReader.scala @@ -1,47 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.transform +import org.apache.sedona.sql.datasources.geopackage.errors.GeopackageException import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT -import java.nio.ByteBuffer +import java.nio.{ByteBuffer, ByteOrder} object GeometryReader { def extractWKB(gpkgGeom: Array[Byte]): Array[Byte] = { - val buffer = ByteBuffer.wrap(gpkgGeom) + val reader = ByteBuffer.wrap(gpkgGeom) val magic = new Array[Byte](2) - buffer.get(magic) + reader.get(magic) if (magic(0) != 71 || magic(1) != 80) { - throw new Exception("Invalid GeoPackage geometry magic number") + throw new GeopackageException("Invalid GeoPackage geometry magic number") } // Read version and flags - val version = buffer.get() - val flags = buffer.get() - - // Parse flags - val envelopeIndicator = (flags >> 1) & 0x07 - val littleEndian = (flags & 0x01) == 1 - - // Determine envelope size - val envelopeSize = envelopeIndicator match { - case 0 => 0 - case 1 | 3 => 32 - case 2 => 48 - case 4 => 64 - case _ => throw new Exception("Invalid envelope indicator in GeoPackage geometry header") - } + val versionByte = Array.ofDim[Byte](1) + reader.get(versionByte) + + val flagsByte = Array.ofDim[Byte](1) + reader.get(flagsByte) + + val resolvedFlags = readFlags(flagsByte(0)) - // read the srs id - val srsId = buffer.getInt() + reader.order(resolvedFlags._2) - buffer.get(envelopeSize) + // skip srid for now + reader.getInt() - // Skip the envelope - // Extract WKB (we need to migrate to EWKB) - val wkb = new Array[Byte](buffer.remaining()) - buffer.get(wkb) + skipEnvelope(resolvedFlags._1, reader) + + val wkb = new Array[Byte](reader.remaining()) + reader.get(wkb) val wkbReader = new org.locationtech.jts.io.WKBReader() val geom = wkbReader.read(wkb) @@ -50,4 +61,41 @@ object GeometryReader { GeometryUDT.serialize(geom) } + def skipEnvelope(value: Any, buffer: ByteBuffer): Any = { + value match { + case 0 => null + case 1 | 3 => buffer.get(new Array[Byte](32)) + case 2 => buffer.get(new Array[Byte](48)) + case 4 => buffer.get(new Array[Byte](64)) + case _ => + throw new GeopackageException( + "Unexpected GeoPackage Geometry flags. " + + "Envelope contents indicator must be between 0 and 4. Actual: " + value) + } + } + + private def readFlags(flags: Byte): (Int, ByteOrder, Boolean) = { + val reserved7 = (flags >> 7) & 1 + val reserved6 = (flags >> 6) & 1 + if (reserved7 != 0 || reserved6 != 0) + throw new GeopackageException( + "Unexpected GeoPackage Geometry flags. " + + "Flag bit 7 and 6 should both be 0, 7=" + reserved7 + ", 6=" + reserved6) + val binaryType = (flags >> 5) & 1 + val extended = binaryType == 1 + val emptyValue = (flags >> 4) & 1 + val empty = emptyValue == 1 + val envelopeIndicator = (flags >> 1) & 7 + if (envelopeIndicator > 4) + throw new GeopackageException( + "Unexpected GeoPackage Geometry flags. " + + "Envelope contents indicator must be between 0 and 4. Actual: " + envelopeIndicator) + val byteOrderValue = flags & 1 + val byteOrder = + if (byteOrderValue == 0) ByteOrder.BIG_ENDIAN + else ByteOrder.LITTLE_ENDIAN + + (envelopeIndicator, byteOrder, extended) + } + } diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/Image.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/Image.scala index d913dd666..e8123c79d 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/Image.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/Image.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.transform import org.apache.sedona.sql.datasources.geopackage.model.ImageFileFormat.ImageFileFormat @@ -13,20 +31,15 @@ import javax.imageio.ImageIO object Image { def readImageFile( - byteArray: Array[Byte], - tileMetadata: TileMetadata, - tileRowMetadata: TileRowMetadata - ): GridCoverage2D = { + byteArray: Array[Byte], + tileMetadata: TileMetadata, + tileRowMetadata: TileRowMetadata): GridCoverage2D = { val format = detectFileFormat(byteArray) format match { case ImageFileFormat.JPEG | ImageFileFormat.PNG => val image = readImageFromBinary(byteArray) - val gridCoverage = createGridCoverage2D( - image, - tileMetadata, - tileRowMetadata - ) + val gridCoverage = createGridCoverage2D(image, tileMetadata, tileRowMetadata) gridCoverage case ImageFileFormat.WEBP => throw new UnsupportedOperationException("WebP format is not supported") @@ -53,7 +66,8 @@ object Image { return ImageFileFormat.PNG } - if (magicHex.startsWith("FFD8FFE0") || magicHex.startsWith("FFD8FFE1") || magicHex.startsWith("FFD8FFE8")) { + if (magicHex.startsWith("FFD8FFE0") || magicHex.startsWith("FFD8FFE1") || magicHex.startsWith( + "FFD8FFE8")) { return ImageFileFormat.JPEG } @@ -69,18 +83,14 @@ object Image { } def createGridCoverage2D( - image: BufferedImage, - tileMetadata: TileMetadata, - tileRowMetadata: TileRowMetadata - ): GridCoverage2D = { - val envelope = tileMetadata.getEnvelope( - tileRowMetadata - ) + image: BufferedImage, + tileMetadata: TileMetadata, + tileRowMetadata: TileRowMetadata): GridCoverage2D = { + val envelope = tileMetadata.getEnvelope(tileRowMetadata) val genevelope = new GeneralEnvelope( Array(envelope.minX, envelope.minY), - Array(envelope.maxX, envelope.maxY) - ) + Array(envelope.maxX, envelope.maxY)) genevelope.setCoordinateReferenceSystem(tileMetadata.getSRID()) val coverageFactory = new GridCoverageFactory() diff --git a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/ValuesMapper.scala b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/ValuesMapper.scala index 03b2be0a3..f53482864 100644 --- a/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/ValuesMapper.scala +++ b/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/ValuesMapper.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql.datasources.geopackage.transform import org.apache.sedona.sql.datasources.geopackage.model.{GeoPackageType, PartitionOptions, TileRowMetadata} @@ -6,63 +24,57 @@ import org.apache.spark.sql.sedona_sql.UDT.RasterUDT import org.apache.spark.unsafe.types.UTF8String object ValuesMapper { - def mapValues( - metadata: PartitionOptions, - rs: java.sql.ResultSet): Seq[Any] = { - metadata.columns.map( - column => { - (column.dataType, metadata.loadOptions.tableType) match { - case (GeoPackageType.INTEGER | GeoPackageType.INT, _) => rs.getInt(column.name) - case (GeoPackageType.TINY_INT, _) => rs.getInt(column.name) - case (GeoPackageType.SMALLINT, _) => rs.getInt(column.name) - case (GeoPackageType.MEDIUMINT, _) => rs.getInt(column.name) - case (GeoPackageType.FLOAT, _) => rs.getFloat(column.name) - case (GeoPackageType.DOUBLE, _) => rs.getDouble(column.name) - case (GeoPackageType.REAL, _) => rs.getDouble(column.name) - case (startsWith: String, _) if startsWith.startsWith(GeoPackageType.TEXT) => - UTF8String.fromString(rs.getString(column.name)) - case (startsWith: String, TILES) if startsWith.startsWith(GeoPackageType.BLOB) && column.name == "tile_data" => - metadata.tile match { - case Some(value) => - value.tileRowMetadata.map( - tileRowMetadata => { - RasterUDT.serialize(Image.readImageFile( - rs.getBytes(column.name), - value, - tileRowMetadata - )) - } - ).orNull - case None => null - } - case (startsWith: String, _) if startsWith.startsWith(GeoPackageType.BLOB) => - rs.getBytes(column.name) - case (GeoPackageType.BOOLEAN, _) => - rs.getBoolean(column.name) - case (GeoPackageType.DATE, _) => - DataTypesTransformations.getDays(rs.getString(column.name)) - case (GeoPackageType.DATETIME, _) => - DataTypesTransformations.epoch(rs.getString(column.name)) * 1000 - case (GeoPackageType.POINT, _) => - GeometryReader.extractWKB(rs.getBytes(column.name)) - case (GeoPackageType.LINESTRING, _) => - GeometryReader.extractWKB(rs.getBytes(column.name)) - case (GeoPackageType.POLYGON, _) => - GeometryReader.extractWKB(rs.getBytes(column.name)) - case (GeoPackageType.GEOMETRY, _) => - GeometryReader.extractWKB(rs.getBytes(column.name)) - case (GeoPackageType.MULTIPOINT, _) => - GeometryReader.extractWKB(rs.getBytes(column.name)) - case (GeoPackageType.MULTILINESTRING, _) => - GeometryReader.extractWKB(rs.getBytes(column.name)) - case (GeoPackageType.MULTIPOLYGON, _) => - GeometryReader.extractWKB(rs.getBytes(column.name)) - case (GeoPackageType.GEOMETRYCOLLECTION, _) => - GeometryReader.extractWKB(rs.getBytes(column.name)) - case _ => - UTF8String.fromString(rs.getString(column.name)) - } + def mapValues(metadata: PartitionOptions, rs: java.sql.ResultSet): Seq[Any] = { + metadata.columns.map(column => { + (column.dataType, metadata.loadOptions.tableType) match { + case (GeoPackageType.INTEGER | GeoPackageType.INT, _) => rs.getInt(column.name) + case (GeoPackageType.TINY_INT, _) => rs.getInt(column.name) + case (GeoPackageType.SMALLINT, _) => rs.getInt(column.name) + case (GeoPackageType.MEDIUMINT, _) => rs.getInt(column.name) + case (GeoPackageType.FLOAT, _) => rs.getFloat(column.name) + case (GeoPackageType.DOUBLE, _) => rs.getDouble(column.name) + case (GeoPackageType.REAL, _) => rs.getDouble(column.name) + case (startsWith: String, _) if startsWith.startsWith(GeoPackageType.TEXT) => + UTF8String.fromString(rs.getString(column.name)) + case (startsWith: String, TILES) + if startsWith.startsWith(GeoPackageType.BLOB) && column.name == "tile_data" => + metadata.tile match { + case Some(value) => + value.tileRowMetadata + .map(tileRowMetadata => { + RasterUDT.serialize( + Image.readImageFile(rs.getBytes(column.name), value, tileRowMetadata)) + }) + .orNull + case None => null + } + case (startsWith: String, _) if startsWith.startsWith(GeoPackageType.BLOB) => + rs.getBytes(column.name) + case (GeoPackageType.BOOLEAN, _) => + rs.getBoolean(column.name) + case (GeoPackageType.DATE, _) => + DataTypesTransformations.getDays(rs.getString(column.name)) + case (GeoPackageType.DATETIME, _) => + DataTypesTransformations.epoch(rs.getString(column.name)) * 1000 + case (GeoPackageType.POINT, _) => + GeometryReader.extractWKB(rs.getBytes(column.name)) + case (GeoPackageType.LINESTRING, _) => + GeometryReader.extractWKB(rs.getBytes(column.name)) + case (GeoPackageType.POLYGON, _) => + GeometryReader.extractWKB(rs.getBytes(column.name)) + case (GeoPackageType.GEOMETRY, _) => + GeometryReader.extractWKB(rs.getBytes(column.name)) + case (GeoPackageType.MULTIPOINT, _) => + GeometryReader.extractWKB(rs.getBytes(column.name)) + case (GeoPackageType.MULTILINESTRING, _) => + GeometryReader.extractWKB(rs.getBytes(column.name)) + case (GeoPackageType.MULTIPOLYGON, _) => + GeometryReader.extractWKB(rs.getBytes(column.name)) + case (GeoPackageType.GEOMETRYCOLLECTION, _) => + GeometryReader.extractWKB(rs.getBytes(column.name)) + case _ => + UTF8String.fromString(rs.getString(column.name)) } - ) + }) } } diff --git a/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala b/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala index acccfdd80..45a004844 100644 --- a/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala +++ b/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.sedona.sql import org.apache.spark.sql.DataFrame @@ -7,28 +25,29 @@ import org.scalatest.matchers.should.Matchers import org.scalatest.prop.TableDrivenPropertyChecks._ import java.sql.{Date, Timestamp} - class GeoPackageReaderTest extends TestBaseScala with Matchers { import sparkSession.implicits._ - val path = "/Users/pawelkocinski/Desktop/projects/sedona/spark/spark-3.3/src/main/scala/org/apache/sedona/sql/datasources/geopackage/example.gpkg" + val path: String = resourceFolder + "geopackage/example.gpkg" + val polygonsPath: String = resourceFolder + "geopackage/features.gpkg" + val rasterPath: String = resourceFolder + "geopackage/raster.gpkg" val wktReader = new org.locationtech.jts.io.WKTReader() val wktWriter = new org.locationtech.jts.io.WKTWriter() - val expectedFeatureSchema = StructType(Seq( - StructField("id", IntegerType, true), - StructField("geometry", GeometryUDT, true), - StructField("text", StringType, true), - StructField("real", DoubleType, true), - StructField("boolean", BooleanType, true), - StructField("blob", BinaryType, true), - StructField("integer", IntegerType, true), - StructField("text_limited", StringType, true), - StructField("blob_limited", BinaryType, true), - StructField("date", DateType, true), - StructField("datetime", TimestampType, true) - )) + val expectedFeatureSchema = StructType( + Seq( + StructField("id", IntegerType, true), + StructField("geometry", GeometryUDT, true), + StructField("text", StringType, true), + StructField("real", DoubleType, true), + StructField("boolean", BooleanType, true), + StructField("blob", BinaryType, true), + StructField("integer", IntegerType, true), + StructField("text_limited", StringType, true), + StructField("blob_limited", BinaryType, true), + StructField("date", DateType, true), + StructField("datetime", TimestampType, true))) describe("Reading geopackage metadata") { it("should read GeoPackage metadata") { @@ -59,28 +78,20 @@ class GeoPackageReaderTest extends TestBaseScala with Matchers { "BIT Systems", 4519.866024037493, true, - Array( - 48, 99, 57, 54, 49, 56, 55, 54, 45, 98, 102, 100, 52, 45, - 52, 102, 52, 48, 45, 97, 49, 102, 101, 45, 55, 49, 55, 101, - 57, 100, 50, 98, 48, 55, 98, 101 - ), + Array(48, 99, 57, 54, 49, 56, 55, 54, 45, 98, 102, 100, 52, 45, 52, 102, 52, 48, 45, 97, + 49, 102, 101, 45, 55, 49, 55, 101, 57, 100, 50, 98, 48, 55, 98, 101), 3, "bcd5a36f-16dc-4385-87be-b40353848597", - Array( - 49, 50, 53, 50, 97, 99, 98, 52, 45, 57, 54, 54, 52, 45, 52, - 101, 51, 50, 45, 57, 54, 100, 101, 45, 56, 48, 54, 101, 101, - 48, 101, 101, 49, 102, 57, 48 - ), + Array(49, 50, 53, 50, 97, 99, 98, 52, 45, 57, 54, 54, 52, 45, 52, 101, 51, 50, 45, 57, 54, + 100, 101, 45, 56, 48, 54, 101, 101, 48, 101, 101, 49, 102, 57, 48), Date.valueOf("2023-09-19"), - Timestamp.valueOf("2023-09-19 13:24:15.695") - ) + Timestamp.valueOf("2023-09-19 13:24:15.695")) firstElement should contain theSameElementsAs expectedValues } it("should read GeoPackage - line1") { val df = readFeatureData("line1") - df.printSchema() df.schema shouldEqual expectedFeatureSchema @@ -94,17 +105,14 @@ class GeoPackageReaderTest extends TestBaseScala with Matchers { "East Lockheed Drive", 1990.5159635296877, false, - Array( - 54, 97, 98, 100, 98, 51, 97, 56, 45, 54, 53, 101, 48, 45, 52, 55, 48, 54, 45, 56, 50, 52, 48, 45, 51, 57, 48, 55, 99, 50, 102, 102, 57, 48, 99, 55 - ), + Array(54, 97, 98, 100, 98, 51, 97, 56, 45, 54, 53, 101, 48, 45, 52, 55, 48, 54, 45, 56, + 50, 52, 48, 45, 51, 57, 48, 55, 99, 50, 102, 102, 57, 48, 99, 55), 1, "13dd91dc-3b7d-4d8d-a0ca-b3afb8e31c3d", - Array( - 57, 54, 98, 102, 56, 99, 101, 56, 45, 102, 48, 54, 49, 45, 52, 55, 99, 48, 45, 97, 98, 48, 101, 45, 97, 99, 50, 52, 100, 98, 50, 97, 102, 50, 50, 54 - ), + Array(57, 54, 98, 102, 56, 99, 101, 56, 45, 102, 48, 54, 49, 45, 52, 55, 99, 48, 45, 97, + 98, 48, 101, 45, 97, 99, 50, 52, 100, 98, 50, 97, 102, 50, 50, 54), Date.valueOf("2023-09-19"), - Timestamp.valueOf("2023-09-19 13:24:15.716") - ) + Timestamp.valueOf("2023-09-19 13:24:15.716")) } it("should read GeoPackage - polygon1") { @@ -113,8 +121,7 @@ class GeoPackageReaderTest extends TestBaseScala with Matchers { df.schema shouldEqual expectedFeatureSchema df.select("geometry").collectAsList().get(0).toSeq should contain theSameElementsAs Seq( - wktReader.read(POLYGON_1) - ) + wktReader.read(POLYGON_1)) } it("should read GeoPackage - geometry1") { @@ -122,7 +129,9 @@ class GeoPackageReaderTest extends TestBaseScala with Matchers { df.count shouldEqual 10 df.schema shouldEqual expectedFeatureSchema - df.selectExpr("ST_ASTEXT(geometry)").as[String].collect() should contain theSameElementsAs Seq( + df.selectExpr("ST_ASTEXT(geometry)") + .as[String] + .collect() should contain theSameElementsAs Seq( POINT_1, POINT_2, POINT_3, @@ -132,40 +141,86 @@ class GeoPackageReaderTest extends TestBaseScala with Matchers { LINESTRING_3, POLYGON_1, POLYGON_2, - POLYGON_3 - ) + POLYGON_3) } - } + it("should read polygon with envelope data") { + val tables = Table( + ("tableName", "expectedCount"), + ("GB_Hex_5km_GS_CompressibleGround_v8", 4233), + ("GB_Hex_5km_GS_Landslides_v8", 4228), + ("GB_Hex_5km_GS_RunningSand_v8", 4233), + ("GB_Hex_5km_GS_ShrinkSwell_v8", 4233), + ("GB_Hex_5km_GS_SolubleRocks_v8", 4295)) + + forAll(tables) { (tableName: String, expectedCount: Int) => + val df = sparkSession.read + .format("geopackage") + .option("tableName", tableName) + .load(polygonsPath) + + df.count() shouldEqual expectedCount + } + } + } - describe("GeoPackage Test") { + describe("GeoPackage Raster Data Test") { it("should read") { val fractions = Table( ("tableName", "channelNumber", "expectedSum"), ("point1_tiles", 4, 466591.0), ("line1_tiles", 4, 5775976.0), - ("polygon1_tiles", 4, 1.1269871E7), - ("geometry1_tiles", 4, 2.6328442E7), + ("polygon1_tiles", 4, 1.1269871e7), + ("geometry1_tiles", 4, 2.6328442e7), ("point2_tiles", 4, 137456.0), ("line2_tiles", 4, 6701101.0), - ("polygon2_tiles", 4, 5.1170714E7), - ("geometry2_tiles", 4, 1.6699823E7), - ("bit_systems", 1, 6.5561879E7), - ("nga", 1, 6.8078856E7), - ("bit_systems_wgs84", 1, 7.7276934E7), - ("nga_pc", 1, 2.90590616E8), - ("bit_systems_world", 1, 7.7276934E7), - ("nga_pc_world", 1, 2.90590616E8), - ) - - forAll (fractions) { (tableName: String, channelNumber: Int, expectedSum: Double) => { - val df = readFeatureData(tableName) - val calculatedSum = df.selectExpr(s"RS_SummaryStats(tile_data, 'sum', ${channelNumber}) as stats") - .selectExpr("sum(stats)").as[Double] - - calculatedSum.collect().head shouldEqual expectedSum - }} + ("polygon2_tiles", 4, 5.1170714e7), + ("geometry2_tiles", 4, 1.6699823e7), + ("bit_systems", 1, 6.5561879e7), + ("nga", 1, 6.8078856e7), + ("bit_systems_wgs84", 1, 7.7276934e7), + ("nga_pc", 1, 2.90590616e8), + ("bit_systems_world", 1, 7.7276934e7), + ("nga_pc_world", 1, 2.90590616e8)) + + forAll(fractions) { (tableName: String, channelNumber: Int, expectedSum: Double) => + { + val df = readFeatureData(tableName) + val calculatedSum = df + .selectExpr(s"RS_SummaryStats(tile_data, 'sum', ${channelNumber}) as stats") + .selectExpr("sum(stats)") + .as[Double] + + calculatedSum.collect().head shouldEqual expectedSum + } + } + } + + it("should be able to read complex raster data") { + val df = sparkSession.read + .format("geopackage") + .option("tableName", "AuroraAirportNoise") + .load(rasterPath) + + val calculatedSum = df + .selectExpr(s"RS_SummaryStats(tile_data, 'sum', ${1}) as stats") + .selectExpr("sum(stats)") + .as[Double] + + calculatedSum.first() shouldEqual 2.027126e7 + + val df2 = sparkSession.read + .format("geopackage") + .option("tableName", "LiquorLicenseDensity") + .load(rasterPath) + + val calculatedSum2 = df2 + .selectExpr(s"RS_SummaryStats(tile_data, 'sum', ${1}) as stats") + .selectExpr("sum(stats)") + .as[Double] + + calculatedSum2.first() shouldEqual 2.882028e7 } } @@ -181,11 +236,16 @@ class GeoPackageReaderTest extends TestBaseScala with Matchers { private val POINT_2 = "POINT (-104.802987 39.717703)" private val POINT_3 = "POINT (-104.807496 39.714085)" private val POINT_4 = "POINT (-104.79948 39.714729)" - private val LINESTRING_1 = "LINESTRING (-104.800614 39.720721, -104.802174 39.720726, -104.802584 39.72066, -104.803088 39.720477, -104.803474 39.720209)" - private val LINESTRING_2 = "LINESTRING (-104.809612 39.718379, -104.806638 39.718372, -104.806236 39.718439, -104.805939 39.718536, -104.805654 39.718677, -104.803652 39.720095)" - private val LINESTRING_3 = "LINESTRING (-104.806344 39.722425, -104.805854 39.722634, -104.805656 39.722647, -104.803749 39.722641, -104.803769 39.721849, -104.803806 39.721725, -104.804382 39.720865)" - private val POLYGON_1 = "POLYGON ((-104.802246 39.720343, -104.802246 39.719753, -104.802183 39.719754, -104.802184 39.719719, -104.802138 39.719694, -104.802097 39.719691, -104.802096 39.719648, -104.801646 39.719648, -104.801644 39.719722, -104.80155 39.719723, -104.801549 39.720207, -104.801648 39.720207, -104.801648 39.720341, -104.802246 39.720343))" - private val POLYGON_2 = "POLYGON ((-104.802259 39.719604, -104.80226 39.71955, -104.802281 39.719416, -104.802332 39.719372, -104.802081 39.71924, -104.802044 39.71929, -104.802027 39.719278, -104.802044 39.719229, -104.801785 39.719129, -104.801639 39.719413, -104.801649 39.719472, -104.801694 39.719524, -104.801753 39.71955, -104.80175 39.719606, -104.80194 39.719606, -104.801939 39.719555, -104.801977 39.719556, -104.801979 39.719606, -104.802259 39.719604), (-104.80213 39.71944, -1 [...] - private val POLYGON_3 = "POLYGON ((-104.802867 39.718122, -104.802369 39.717845, -104.802571 39.71763, -104.803066 39.717909, -104.802867 39.718122))" - + private val LINESTRING_1 = + "LINESTRING (-104.800614 39.720721, -104.802174 39.720726, -104.802584 39.72066, -104.803088 39.720477, -104.803474 39.720209)" + private val LINESTRING_2 = + "LINESTRING (-104.809612 39.718379, -104.806638 39.718372, -104.806236 39.718439, -104.805939 39.718536, -104.805654 39.718677, -104.803652 39.720095)" + private val LINESTRING_3 = + "LINESTRING (-104.806344 39.722425, -104.805854 39.722634, -104.805656 39.722647, -104.803749 39.722641, -104.803769 39.721849, -104.803806 39.721725, -104.804382 39.720865)" + private val POLYGON_1 = + "POLYGON ((-104.802246 39.720343, -104.802246 39.719753, -104.802183 39.719754, -104.802184 39.719719, -104.802138 39.719694, -104.802097 39.719691, -104.802096 39.719648, -104.801646 39.719648, -104.801644 39.719722, -104.80155 39.719723, -104.801549 39.720207, -104.801648 39.720207, -104.801648 39.720341, -104.802246 39.720343))" + private val POLYGON_2 = + "POLYGON ((-104.802259 39.719604, -104.80226 39.71955, -104.802281 39.719416, -104.802332 39.719372, -104.802081 39.71924, -104.802044 39.71929, -104.802027 39.719278, -104.802044 39.719229, -104.801785 39.719129, -104.801639 39.719413, -104.801649 39.719472, -104.801694 39.719524, -104.801753 39.71955, -104.80175 39.719606, -104.80194 39.719606, -104.801939 39.719555, -104.801977 39.719556, -104.801979 39.719606, -104.802259 39.719604), (-104.80213 39.71944, -104.802133 39.71949, -1 [...] + private val POLYGON_3 = + "POLYGON ((-104.802867 39.718122, -104.802369 39.717845, -104.802571 39.71763, -104.803066 39.717909, -104.802867 39.718122))" } diff --git a/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/TestBaseScala.scala b/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/TestBaseScala.scala index 2da12eceb..bfb9bb848 100644 --- a/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/TestBaseScala.scala +++ b/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/TestBaseScala.scala @@ -36,6 +36,7 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll { .master("local[*]") .appName("sedonasqlScalaTest") .config("spark.sql.warehouse.dir", warehouseLocation) + .config("spark.jars.packages", "") // We need to be explicit about broadcasting in tests. .config("sedona.join.autoBroadcastJoinThreshold", "-1") .getOrCreate()
