GitHub user michelelaporta added a comment to the discussion: Sedona LEFT JOIN 
using SPATIAL INDEX

Solved follow a working examples:

```
package test;

import static org.apache.spark.sql.functions.broadcast;
import static org.apache.spark.sql.functions.expr;

import java.util.ArrayList;
import java.util.List;

import org.apache.sedona.spark.SedonaContext;
import org.apache.sedona.sql.SedonaSqlExtensions;
import org.apache.sedona.viz.core.Serde.SedonaVizKryoRegistrator;
import org.apache.sedona.viz.sql.utils.SedonaVizRegistrator;
import org.apache.spark.SparkConf;
import org.apache.spark.serializer.KryoSerializer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
/**
 * @see https://github.com/apache/sedona/issues/247

--add-opens=java.base/java.nio=ALL-UNNAMED
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED
--add-opens=java.base/java.util=ALL-UNNAMED 
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.base/sun.util.calendar=ALL-UNNAMED
 */
public class SedonBroadcastTest {

    public static void main(String[] args) {
        try {
                SparkConf sparkConf = new SparkConf()
                    .setMaster("local[" + 
Runtime.getRuntime().availableProcessors() + "]")
                    .set("spark.sedona.global.index", "true")
                    .set("spark.kryo.registrator", 
SedonaVizKryoRegistrator.class.getName())
                    .set("spark.sql.extensions", 
SedonaSqlExtensions.class.getName())
                    .set("spark.serializer", KryoSerializer.class.getName())
                    ;

            SparkSession sedona = 
SedonaContext.builder().config(sparkConf).getOrCreate();
            SedonaVizRegistrator.registerAll(sedona);
                        
            String polygonData = "POLYGON ((38.72 13.14,38.72 13.21,38.68 
13.21,38.68 13.14,38.72 13.14))";
            String polygonSql = "select ST_GeomFromWKT('" + polygonData + "') 
as geometry";
            
            Dataset<Row> polygon =sedona.sql(polygonSql);
            
            // point inside
            double lat_in1 = 13.17d;
            double lon_in1 = 38.70d;
                Row point_in1 = RowFactory.create(lat_in1,lon_in1);
                        
                        // point outside
                        double lat_out1 = 13.502197d;
            double lon_out1 = 38.464880d;
                Row point_out1 = RowFactory.create(lat_out1,lon_out1);

                List<Row> rowData = new ArrayList<>();
                        rowData.add(point_in1);
                        rowData.add(point_out1);
                
                StructType schema = DataTypes.createStructType(
                                new StructField[] {
                                        DataTypes.createStructField("lat", 
DataTypes.DoubleType, true),
                                        DataTypes.createStructField("lon", 
DataTypes.DoubleType, true),
                        });
                        Dataset<Row> data = sedona.createDataFrame(rowData, 
schema);
                        data.createOrReplaceTempView("inputSpatialDf");

                        String sqlQuery = "SELECT 
ST_GeomFromText(CONCAT('POINT('," + "lon" + ",' '," + "lat" + ",')')) as point 
from inputSpatialDf";
                        Dataset<Row> pointDs = sedona.sql(sqlQuery);
                        
                        // USING BROADCAST
                        Dataset<Row> broadcastJoinDf = 
pointDs.alias("pointDf").join(
                                broadcast(polygon).alias("polygonDf"), 
expr("ST_Within(pointDf.point, polygonDf.geometry)"), "left");
                        broadcastJoinDf.show();
                        
                
                } catch (Exception e) {
                        e.printStackTrace();
                }
    }
}

```

The results:

```
+--------------------+--------------------+
|               point|            geometry|
+--------------------+--------------------+
|  POINT (38.7 13.17)|POLYGON ((38.72 1...|
|POINT (38.46488 1...|                NULL|
+--------------------+--------------------+
```


In logs I can see SpatialRDD has been triggered:

```
25/12/01 12:54:14 DEBUG SedonaSqlParser:64 Parsing command: inputSpatialDf
25/12/01 12:55:28 INFO SpatialRDD:271 Collected 1361 samples

```
I am correct now?


GitHub link: 
https://github.com/apache/sedona/discussions/2520#discussioncomment-15124163

----
This is an automatically sent email for [email protected].
To unsubscribe, please send an email to: [email protected]

Reply via email to