GitHub user michelelaporta added a comment to the discussion: Sedona LEFT JOIN using SPATIAL INDEX
Solved follow a working examples: ``` package test; import static org.apache.spark.sql.functions.broadcast; import static org.apache.spark.sql.functions.expr; import java.util.ArrayList; import java.util.List; import org.apache.sedona.spark.SedonaContext; import org.apache.sedona.sql.SedonaSqlExtensions; import org.apache.sedona.viz.core.Serde.SedonaVizKryoRegistrator; import org.apache.sedona.viz.sql.utils.SedonaVizRegistrator; import org.apache.spark.SparkConf; import org.apache.spark.serializer.KryoSerializer; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; /** * @see https://github.com/apache/sedona/issues/247 --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-exports java.base/sun.nio.ch=ALL-UNNAMED --add-exports java.base/sun.util.calendar=ALL-UNNAMED */ public class SedonBroadcastTest { public static void main(String[] args) { try { SparkConf sparkConf = new SparkConf() .setMaster("local[" + Runtime.getRuntime().availableProcessors() + "]") .set("spark.sedona.global.index", "true") .set("spark.kryo.registrator", SedonaVizKryoRegistrator.class.getName()) .set("spark.sql.extensions", SedonaSqlExtensions.class.getName()) .set("spark.serializer", KryoSerializer.class.getName()) ; SparkSession sedona = SedonaContext.builder().config(sparkConf).getOrCreate(); SedonaVizRegistrator.registerAll(sedona); String polygonData = "POLYGON ((38.72 13.14,38.72 13.21,38.68 13.21,38.68 13.14,38.72 13.14))"; String polygonSql = "select ST_GeomFromWKT('" + polygonData + "') as geometry"; Dataset<Row> polygon =sedona.sql(polygonSql); // point inside double lat_in1 = 13.17d; double lon_in1 = 38.70d; Row point_in1 = RowFactory.create(lat_in1,lon_in1); // point outside double lat_out1 = 13.502197d; double lon_out1 = 38.464880d; Row point_out1 = RowFactory.create(lat_out1,lon_out1); List<Row> rowData = new ArrayList<>(); rowData.add(point_in1); rowData.add(point_out1); StructType schema = DataTypes.createStructType( new StructField[] { DataTypes.createStructField("lat", DataTypes.DoubleType, true), DataTypes.createStructField("lon", DataTypes.DoubleType, true), }); Dataset<Row> data = sedona.createDataFrame(rowData, schema); data.createOrReplaceTempView("inputSpatialDf"); String sqlQuery = "SELECT ST_GeomFromText(CONCAT('POINT('," + "lon" + ",' '," + "lat" + ",')')) as point from inputSpatialDf"; Dataset<Row> pointDs = sedona.sql(sqlQuery); // USING BROADCAST Dataset<Row> broadcastJoinDf = pointDs.alias("pointDf").join( broadcast(polygon).alias("polygonDf"), expr("ST_Within(pointDf.point, polygonDf.geometry)"), "left"); broadcastJoinDf.show(); } catch (Exception e) { e.printStackTrace(); } } } ``` The results: ``` +--------------------+--------------------+ | point| geometry| +--------------------+--------------------+ | POINT (38.7 13.17)|POLYGON ((38.72 1...| |POINT (38.46488 1...| NULL| +--------------------+--------------------+ ``` In logs I can see SpatialRDD has been triggered: ``` 25/12/01 12:54:14 DEBUG SedonaSqlParser:64 Parsing command: inputSpatialDf 25/12/01 12:55:28 INFO SpatialRDD:271 Collected 1361 samples ``` I am correct now? GitHub link: https://github.com/apache/sedona/discussions/2520#discussioncomment-15124163 ---- This is an automatically sent email for [email protected]. To unsubscribe, please send an email to: [email protected]
