This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git
The following commit(s) were added to refs/heads/asf-site by this push:
new 5fe2f09 update documentation for main branch
5fe2f09 is described below
commit 5fe2f096ff3419c23eaaa144f49743a51e2eb811
Author: GitHub Actions <[email protected]>
AuthorDate: Fri Sep 19 21:05:20 2025 +0000
update documentation for main branch
---
404.html | 113 +++--
.../index.html | 468 ++++++++++++++-------
image/datasets-continents.png | Bin 0 -> 101103 bytes
image/datasets-schema.png | Bin 0 -> 355233 bytes
index.html | 120 ++++--
overview-methodology/index.html | 140 ++++--
search/search_index.json | 2 +-
7 files changed, 607 insertions(+), 236 deletions(-)
diff --git a/404.html b/404.html
index 11ff0f9..a92686f 100644
--- a/404.html
+++ b/404.html
@@ -187,37 +187,19 @@
- <li class="md-tabs__item">
- <a href="/." class="md-tabs__link">
-
-
-
+
+ <li class="md-tabs__item">
+ <a href="/." class="md-tabs__link">
+
- SpatialBench
-
- </a>
- </li>
+ Home
-
-
-
-
-
-
- <li class="md-tabs__item">
- <a href="/overview-methodology/" class="md-tabs__link">
-
-
-
+ </a>
+ </li>
- Overview and Methodology
-
- </a>
- </li>
-
@@ -352,6 +334,49 @@
+
+
+
+
+
+
+
+
+
+
+ <li class="md-nav__item md-nav__item--nested">
+
+
+
+ <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1" >
+
+
+ <label class="md-nav__link" for="__nav_1" id="__nav_1_label"
tabindex="0">
+
+
+
+ <span class="md-ellipsis">
+ Home
+
+ </span>
+
+
+ <span class="md-nav__icon md-icon"></span>
+ </label>
+
+ <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label"
aria-expanded="false">
+ <label class="md-nav__title" for="__nav_1">
+ <span class="md-nav__icon md-icon"></span>
+ Home
+ </label>
+ <ul class="md-nav__list" data-md-scrollfix>
+
+
+
+
+
+
+
<li class="md-nav__item">
<a href="/." class="md-nav__link">
@@ -367,9 +392,10 @@
</li>
-
-
-
+
+
+
+
@@ -389,6 +415,37 @@
</li>
+
+
+
+
+
+
+
+
+ <li class="md-nav__item">
+ <a href="/datasets-generators/" class="md-nav__link">
+
+
+
+ <span class="md-ellipsis">
+ Datasets and Generators
+
+ </span>
+
+
+ </a>
+ </li>
+
+
+
+
+ </ul>
+ </nav>
+
+ </li>
+
+
diff --git a/overview-methodology/index.html b/datasets-generators/index.html
similarity index 70%
copy from overview-methodology/index.html
copy to datasets-generators/index.html
index e1d8d02..a433033 100644
--- a/overview-methodology/index.html
+++ b/datasets-generators/index.html
@@ -8,7 +8,7 @@
- <link rel="prev" href="..">
+ <link rel="prev" href="../overview-methodology/">
@@ -17,7 +17,7 @@
- <title>SpatialBench Overview and Methodology - Apache Sedona
SpatialBench</title>
+ <title>SpatialBench Datasets and Generators - Apache Sedona
SpatialBench</title>
@@ -82,7 +82,7 @@
<div data-md-component="skip">
- <a href="#spatialbench-overview-and-methodology" class="md-skip">
+ <a href="#spatialbench-datasets-and-generators" class="md-skip">
Skip to content
</a>
@@ -117,7 +117,7 @@
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
- SpatialBench Overview and Methodology
+ SpatialBench Datasets and Generators
</span>
</div>
@@ -201,39 +201,21 @@
-
- <li class="md-tabs__item">
- <a href=".." class="md-tabs__link">
-
-
-
- SpatialBench
-
- </a>
- </li>
-
-
-
-
-
-
-
-
- <li class="md-tabs__item md-tabs__item--active">
- <a href="./" class="md-tabs__link">
-
-
-
+ <li class="md-tabs__item md-tabs__item--active">
+ <a href=".." class="md-tabs__link">
+
+
- Overview and Methodology
+ Home
- </a>
- </li>
+ </a>
+ </li>
+
@@ -367,6 +349,54 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class="md-nav__item md-nav__item--active md-nav__item--section
md-nav__item--nested">
+
+
+
+ <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1"
checked>
+
+
+ <label class="md-nav__link" for="__nav_1" id="__nav_1_label"
tabindex="">
+
+
+
+ <span class="md-ellipsis">
+ Home
+
+ </span>
+
+
+ <span class="md-nav__icon md-icon"></span>
+ </label>
+
+ <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label"
aria-expanded="true">
+ <label class="md-nav__title" for="__nav_1">
+ <span class="md-nav__icon md-icon"></span>
+ Home
+ </label>
+ <ul class="md-nav__list" data-md-scrollfix>
+
+
+
+
+
<li class="md-nav__item">
@@ -384,9 +414,33 @@
</li>
+
+
+
+
+
+
+
+
+ <li class="md-nav__item">
+ <a href="../overview-methodology/" class="md-nav__link">
+
+
+
+ <span class="md-ellipsis">
+ Overview and Methodology
-
-
+ </span>
+
+
+ </a>
+ </li>
+
+
+
+
+
+
@@ -406,7 +460,7 @@
<span class="md-ellipsis">
- Overview and Methodology
+ Datasets and Generators
</span>
@@ -419,7 +473,7 @@
<span class="md-ellipsis">
- Overview and Methodology
+ Datasets and Generators
</span>
@@ -442,63 +496,51 @@
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
- <a href="#why-spatialbench" class="md-nav__link">
+ <a href="#data-model" class="md-nav__link">
<span class="md-ellipsis">
- Why SpatialBench?
+ Data model
</span>
</a>
-</li>
-
- <li class="md-nav__item">
- <a href="#hardware-and-software" class="md-nav__link">
- <span class="md-ellipsis">
- Hardware and software
- </span>
- </a>
-
-</li>
-
- <li class="md-nav__item">
- <a href="#accurately-comparing-different-engines" class="md-nav__link">
+ <nav class="md-nav" aria-label="Data model">
+ <ul class="md-nav__list">
+
+ <li class="md-nav__item">
+ <a href="#zone-table-scaling" class="md-nav__link">
<span class="md-ellipsis">
- Accurately comparing different engines
+ Zone Table Scaling
</span>
</a>
</li>
-
- <li class="md-nav__item">
- <a href="#engine-tuning-in-benchmarks" class="md-nav__link">
+
+ <li class="md-nav__item">
+ <a href="#geographic-coverage" class="md-nav__link">
<span class="md-ellipsis">
- Engine tuning in benchmarks
+ Geographic Coverage
</span>
</a>
</li>
-
- <li class="md-nav__item">
- <a href="#open-source-benchmarks-vs-vendor-benchmarks" class="md-nav__link">
+
+ <li class="md-nav__item">
+ <a href="#distribution-options" class="md-nav__link">
<span class="md-ellipsis">
- Open source benchmarks vs. vendor benchmarks
+ Distribution Options
</span>
</a>
</li>
-
- <li class="md-nav__item">
- <a href="#how-to-contribute" class="md-nav__link">
- <span class="md-ellipsis">
- How to contribute
- </span>
- </a>
+
+ </ul>
+ </nav>
</li>
<li class="md-nav__item">
- <a href="#future-work" class="md-nav__link">
+ <a href="#data-generators" class="md-nav__link">
<span class="md-ellipsis">
- Future work
+ Data generators
</span>
</a>
@@ -511,6 +553,14 @@
</li>
+
+
+ </ul>
+ </nav>
+
+ </li>
+
+
@@ -627,63 +677,51 @@
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
- <a href="#why-spatialbench" class="md-nav__link">
- <span class="md-ellipsis">
- Why SpatialBench?
- </span>
- </a>
-
-</li>
-
- <li class="md-nav__item">
- <a href="#hardware-and-software" class="md-nav__link">
+ <a href="#data-model" class="md-nav__link">
<span class="md-ellipsis">
- Hardware and software
+ Data model
</span>
</a>
-</li>
-
- <li class="md-nav__item">
- <a href="#accurately-comparing-different-engines" class="md-nav__link">
+ <nav class="md-nav" aria-label="Data model">
+ <ul class="md-nav__list">
+
+ <li class="md-nav__item">
+ <a href="#zone-table-scaling" class="md-nav__link">
<span class="md-ellipsis">
- Accurately comparing different engines
+ Zone Table Scaling
</span>
</a>
</li>
-
- <li class="md-nav__item">
- <a href="#engine-tuning-in-benchmarks" class="md-nav__link">
+
+ <li class="md-nav__item">
+ <a href="#geographic-coverage" class="md-nav__link">
<span class="md-ellipsis">
- Engine tuning in benchmarks
+ Geographic Coverage
</span>
</a>
</li>
-
- <li class="md-nav__item">
- <a href="#open-source-benchmarks-vs-vendor-benchmarks" class="md-nav__link">
+
+ <li class="md-nav__item">
+ <a href="#distribution-options" class="md-nav__link">
<span class="md-ellipsis">
- Open source benchmarks vs. vendor benchmarks
+ Distribution Options
</span>
</a>
</li>
-
- <li class="md-nav__item">
- <a href="#how-to-contribute" class="md-nav__link">
- <span class="md-ellipsis">
- How to contribute
- </span>
- </a>
+
+ </ul>
+ </nav>
</li>
<li class="md-nav__item">
- <a href="#future-work" class="md-nav__link">
+ <a href="#data-generators" class="md-nav__link">
<span class="md-ellipsis">
- Future work
+ Data generators
</span>
</a>
@@ -705,7 +743,7 @@
- <a
href="https://github.com/apache/sedona-spatialbench/blob/main/docs/overview-methodology.md"
title="Edit this page" class="md-content__button md-icon" rel="edit">
+ <a
href="https://github.com/apache/sedona-spatialbench/blob/main/docs/datasets-generators.md"
title="Edit this page" class="md-content__button md-icon" rel="edit">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10
20H6V4h7v5h5v3.1l2-2V8l-6-6H6c-1.1 0-2 .9-2 2v16c0 1.1.9 2 2 2h4zm10.2-7c.1 0
.3.1.4.2l1.3 1.3c.2.2.2.6 0 .8l-1 1-2.1-2.1 1-1c.1-.1.2-.2.4-.2m0 3.9L14.1
23H12v-2.1l6.1-6.1z"></path></svg>
</a>
@@ -728,56 +766,190 @@
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
---><h1 id="spatialbench-overview-and-methodology">SpatialBench Overview and
Methodology<a class="headerlink" href="#spatialbench-overview-and-methodology"
title="Permanent link">¶</a></h1>
-<p>SpatialBench is an open benchmark suite of representative spatial queries
designed to evaluate the performance of different engines at multiple scale
factors.</p>
-<p>The SpatialBench queries are a great way to compare the relative
performance between engines for analytical spatial workloads. You can use a
small scale factor for single-machine queries, and a large scale factor to
benchmark an engine that distributes computations in the cloud.</p>
-<p>Let’s take a deeper look at why SpatialBench is so essential.</p>
-<h2 id="why-spatialbench">Why SpatialBench?<a class="headerlink"
href="#why-spatialbench" title="Permanent link">¶</a></h2>
-<p>Spatial workflows encompass queries such as spatial joins, spatial
filtering, and spatial-specific operations, including KNN joins.</p>
-<p>General-purpose analytics query benchmarks don’t cover spatial queries.
They focus on analytical queries, such as joins and aggregations, on tabular
data. Here are some popular analytical benchmarks:</p>
-<ul>
-<li><a href="https://www.tpc.org/tpch/">TPC-H</a></li>
-<li><a href="https://www.tpc.org/tpcds/">TPC-DS</a></li>
-<li><a href="https://benchmark.clickhouse.com/">ClickBench</a></li>
-<li><a href="https://github.com/brianfrankcooper/YCSB">YCSB</a></li>
-<li><a href="https://duckdblabs.github.io/db-benchmark/">db-benchmark</a></li>
-</ul>
-<p>The analytical benchmarks help analyze analytical performance, but that
doesn’t necessarily translate to spatial queries. An engine can be blazing
fast for a large tabular aggregation and terrible for spatial joins.</p>
-<p>SpatialBench is tailored for spatial queries. It’s the best modern option
to assess the spatial performance of an engine. Let’s take a look at some of
the older spatial benchmarks.</p>
-<h2 id="hardware-and-software">Hardware and software<a class="headerlink"
href="#hardware-and-software" title="Permanent link">¶</a></h2>
-<p>SpatialBench runs benchmarks on commodity hardware, with software versions
fully disclosed for each release.</p>
-<p>When comparing different runtimes, developers should make a good-faith
effort to use similar hardware and software versions. It’s not helpful to
compare one runtime with another runtime that has a lot less computational
power.</p>
-<p>SpatialBench benchmarks should always be presented with associated
hardware/software specifications so readers can assess the reliability of the
comparison.</p>
-<h2 id="accurately-comparing-different-engines">Accurately comparing different
engines<a class="headerlink" href="#accurately-comparing-different-engines"
title="Permanent link">¶</a></h2>
-<p>It is challenging to compare fundamentally different engines, such as
PostGIS (an OLTP database), DuckDB (an OLAP database), and GeoPandas (a Python
engine).</p>
-<p>For example, let’s compare how two engines execute a query differently:</p>
+--><h1 id="spatialbench-datasets-and-generators">SpatialBench Datasets and
Generators<a class="headerlink" href="#spatialbench-datasets-and-generators"
title="Permanent link">¶</a></h1>
+<p>This page describes the SpatialBench datasets and shows you how to use the
generators to create the spatial tables.</p>
+<p>SpatialBench is a geospatial benchmark designed for evaluating and
optimizing spatial query performance in data systems. Inspired by the Star
Schema Benchmark (SSB) and the New York City Taxi and Limousine Commission (NYC
TLC) dataset, SpatialBench blends realistic urban mobility scenarios with
standardized benchmarking practices.</p>
+<p>The benchmark adopts the familiar star schema structure from SSB, augmented
with spatial attributes such as pickup and dropoff points, spatial polygon
boundaries for zones, and building footprints. These spatial enhancements allow
SpatialBench to effectively test geospatial operations, including spatial
joins, distance-based queries, spatial aggregations, and point-in-polygon
analyses.</p>
+<p>By combining the systematic approach of SSB with authentic, real-world
scenarios drawn from NYC TLC data, SpatialBench provides meaningful and
practical benchmarks relevant to urban mobility and spatial analytics
workloads.</p>
+<h2 id="data-model">Data model<a class="headerlink" href="#data-model"
title="Permanent link">¶</a></h2>
+<p>SpatialBench tables:</p>
<ul>
-<li>PostGIS: create tables, load data into the tables, build an index (can be
expensive), run the query</li>
-<li>GeoPandas: read data into memory and run a query</li>
+<li><strong>Trip (Fact Table)</strong>: Records individual trips, including
spatial attributes (pickup and dropoff points), trip fare, distance, duration,
and timestamps for pickup and dropoff.</li>
+<li><strong>Customer</strong>: Represents customers who book trips.</li>
+<li><strong>Driver</strong>: Represents drivers who fulfill trips.</li>
+<li><strong>Vehicle</strong>: Details about vehicles used for trips.</li>
+<li><strong>Zone</strong>: Polygon boundaries representing city areas or
zones.</li>
+<li><strong>Building</strong>: Polygon footprints representing building
locations, types, and names.</li>
</ul>
-<p>PostGIS and GeoPandas execute queries differently, so you need to present
the query runtime with caution. For example, you can’t just ignore the time it
takes to build the PostGIS index because that can be the slowest part of the
query. That’s a critical detail for users running ad hoc queries.</p>
-<p>The SpatialBench results strive to present runtimes for all relevant
portions of the query so users are best informed about how to interpret the
results.</p>
-<h2 id="engine-tuning-in-benchmarks">Engine tuning in benchmarks<a
class="headerlink" href="#engine-tuning-in-benchmarks" title="Permanent
link">¶</a></h2>
-<p>Engines can be tuned by configuring settings or optimizing code. For
example, you can optimize Spark code by tuning the JVM. You can optimize
GeoPandas code by adding indexes. Benchmarks that tune one engine and don’t
tune any of the other engines aren’t reliable.</p>
-<p>All performance tuning is fully disclosed in the SpatialBench results.
Some results are presented both naively and fully tuned to give a better
picture of out-of-the-box performance and what’s possible for expert users.</p>
-<h2 id="open-source-benchmarks-vs-vendor-benchmarks">Open source benchmarks
vs. vendor benchmarks<a class="headerlink"
href="#open-source-benchmarks-vs-vendor-benchmarks" title="Permanent
link">¶</a></h2>
-<p>The SpatialBench benchmarks report results for some open source spatial
engines/databases.</p>
-<p>The SpatialBench repository does not report results for any proprietary
engines or vendor runtimes. Vendors are free to use the SpatialBench data
generators and run the benchmarks on their own. We ask vendors to credit
SpatialBench when they run the benchmarks and fully disclose the results so
that other practitioners can reproduce the results.</p>
-<h2 id="how-to-contribute">How to contribute<a class="headerlink"
href="#how-to-contribute" title="Permanent link">¶</a></h2>
-<p>There are a variety of ways to contribute to the SpatialBench project:</p>
+<table>
+<thead>
+<tr>
+<th><strong>Table</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Abbr.</strong></th>
+<th><strong>Primary Role</strong></th>
+<th><strong>Spatial Attributes</strong></th>
+<th><strong>Size per Scale Factor (SF)</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Building</td>
+<td>Dimension</td>
+<td>b_</td>
+<td>Polygon footprints representing building locations</td>
+<td>Polygon footprints</td>
+<td>20K × (1 + log₂(SF))</td>
+</tr>
+<tr>
+<td>Customer</td>
+<td>Dimension</td>
+<td>c_</td>
+<td>Represents customers</td>
+<td>None</td>
+<td>30K × SF</td>
+</tr>
+<tr>
+<td>Driver</td>
+<td>Dimension</td>
+<td>s_</td>
+<td>Represents drivers</td>
+<td>None</td>
+<td>500 x SF</td>
+</tr>
+<tr>
+<td>Trip</td>
+<td>Fact Table</td>
+<td>t_</td>
+<td>Records individual trips</td>
+<td>Pickup/Dropoff Points (location)</td>
+<td>6M × SF</td>
+</tr>
+<tr>
+<td>Vehicle</td>
+<td>Dimension</td>
+<td>v_</td>
+<td>Details about vehicles</td>
+<td>None</td>
+<td>100 x SF</td>
+</tr>
+<tr>
+<td>Zone</td>
+<td>Dimension</td>
+<td>z_</td>
+<td>Polygon boundaries for city zones</td>
+<td>Polygon boundaries</td>
+<td>Tiered by SF range (see below)</td>
+</tr>
+</tbody>
+</table>
+<h3 id="zone-table-scaling">Zone Table Scaling<a class="headerlink"
href="#zone-table-scaling" title="Permanent link">¶</a></h3>
+<table>
+<thead>
+<tr>
+<th><strong>Scale Factor (SF)</strong></th>
+<th><strong>Zone Subtypes Included</strong></th>
+<th><strong>Zone Cardinality</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>[0, 10)</td>
+<td>microhood, macrohood, county</td>
+<td>156,095</td>
+</tr>
+<tr>
+<td>[10, 100)</td>
+<td>+ neighborhood</td>
+<td>455,711</td>
+</tr>
+<tr>
+<td>[100, 1000)</td>
+<td>+ localadmin, locality, region, dependency</td>
+<td>1,035,371</td>
+</tr>
+<tr>
+<td>[1000+)</td>
+<td>+ country</td>
+<td>1,035,749</td>
+</tr>
+</tbody>
+</table>
+<p><a class="glightbox" data-type="image" data-width="100%" data-height="auto"
href="../image/datasets-schema.png" data-desc-position="bottom"><img
alt="schema" src="../image/datasets-schema.png"></a></p>
+<h3 id="geographic-coverage"><strong>Geographic Coverage</strong><a
class="headerlink" href="#geographic-coverage" title="Permanent link">¶</a></h3>
+<p>Spatial Bench's data generator uses continent-bounded affines. Each
continent is defined by a bounding polygon, ensuring generation mostly covers
land areas and introducing the natural skew of real geographies.</p>
+<p>Bounding polygons:</p>
+<table>
+<thead>
+<tr>
+<th>Region</th>
+<th>Bounding Polygon</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Africa</td>
+<td><code>POLYGON ((-20.062752 -40.044425, 64.131567 -40.044425, 64.131567
37.579421, -20.062752 37.579421, -20.062752 -40.044425))</code></td>
+</tr>
+<tr>
+<td>Europe</td>
+<td><code>POLYGON ((-11.964479 37.926872, 64.144374 37.926872, 64.144374
71.82884, -11.964479 71.82884, -11.964479 37.926872))</code></td>
+</tr>
+<tr>
+<td>South Asia</td>
+<td><code>POLYGON ((64.58354 -9.709049, 145.526096 -9.709049, 145.526096
51.672557, 64.58354 51.672557, 64.58354 -9.709049))</code></td>
+</tr>
+<tr>
+<td>North Asia</td>
+<td><code>POLYGON ((64.495655 51.944267, 178.834704 51.944267, 178.834704
77.897255, 64.495655 77.897255, 64.495655 51.944267))</code></td>
+</tr>
+<tr>
+<td>Oceania</td>
+<td><code>POLYGON ((112.481901 -48.980212, 180.768942 -48.980212, 180.768942
-10.228433, 112.481901 -10.228433, 112.481901 -48.980212))</code></td>
+</tr>
+<tr>
+<td>South America</td>
+<td><code>POLYGON ((-83.833822 -56.170016, -33.904338 -56.170016, -33.904338
12.211188, -83.833822 12.211188, -83.833822 -56.170016))</code></td>
+</tr>
+<tr>
+<td>South North America</td>
+<td><code>POLYGON ((-124.890724 12.382931, -69.511192 12.382931, -69.511192
42.55308, -124.890724 42.55308, -124.890724 12.382931))</code></td>
+</tr>
+<tr>
+<td>North North America</td>
+<td><code>POLYGON ((-166.478008 42.681087, -52.053245 42.681087, -52.053245
72.659041, -166.478008 72.659041, -166.478008 42.681087))</code></td>
+</tr>
+</tbody>
+</table>
+<p><a class="glightbox" data-type="image" data-width="100%" data-height="auto"
href="../image/datasets-continents.png" data-desc-position="bottom"><img
alt="continents" src="../image/datasets-continents.png"></a></p>
+<h3 id="distribution-options">Distribution Options<a class="headerlink"
href="#distribution-options" title="Permanent link">¶</a></h3>
+<p>By default, SpatialBench generates points using continent-bounded affines
with a Hierarchical Thomas distribution for the trip and building tables. </p>
+<p>For more realism, you can choose from a variety of spatial distributions
when generating tables:</p>
<ul>
-<li>Submit <a href="https://github.com/apache/sedona-spatialbench/pulls">pull
requests</a> to add features</li>
-<li>Create <a
href="https://github.com/apache/sedona-spatialbench/issues">issues</a> for bug
reports</li>
-<li>Reproduce results or help add new spatial engines</li>
-<li>Publish vendor benchmarks</li>
+<li>Uniform: Evenly spread points in the unit square. </li>
+<li>Normal: Gaussian spread around a mean with configurable variance. </li>
+<li>Diagonal: Points concentrated along the y=x diagonal with configurable
buffer. </li>
+<li>Bit: Recursive grid-like pattern controlled by probability and bit depth.
</li>
+<li>Sierpinski: Self-similar fractal pattern for highly skewed coverage. </li>
+<li>Thomas: Clustered distribution with realistic hotspots and heavy-tailed
skew. </li>
+<li>Hierarchical Thomas: Multi-level clustering (cities → neighborhoods →
points), useful for mimicking urban settlement patterns.</li>
</ul>
-<p>Here is how you can communicate with the team:</p>
+<p>These options let you tailor the spatial skew to your benchmarking needs.
</p>
+<p>See the distributions page to learn more about the supported spatial
distributions, the parameters that control them, and how they impact the
data.</p>
+<h2 id="data-generators">Data generators<a class="headerlink"
href="#data-generators" title="Permanent link">¶</a></h2>
+<p>You can generate the tables for Scale Factor 1 with the following
command:</p>
+<div class="highlight"><pre><span></span><code>spatialbench-cli -s 1
--format=parquet --output-dir sf1-parquet
+</code></pre></div>
+<p>Here are the contents of the <code>sf1-parquet</code> directory:</p>
<ul>
-<li>Chat with us on the <a href="https://discord.gg/9A3k5dEBsY">Apache Sedona
Discord</a></li>
-<li>Create <a href="https://github.com/apache/sedona/discussions">GitHub
Discussions</a></li>
+<li><code>building.parquet</code></li>
+<li><code>customer.parquet</code></li>
+<li><code>driver.parquet</code></li>
+<li><code>trip.parquet</code></li>
+<li><code>vehicle.parquet</code></li>
+<li><code>zone.parquet</code></li>
</ul>
-<h2 id="future-work">Future work<a class="headerlink" href="#future-work"
title="Permanent link">¶</a></h2>
-<p>In the next release, we will add raster datasets and raster queries. These
will stress test an engine’s ability to analyze raster data. They will also
show performance when joining vector and raster datasets.</p>
+<p>See <a href="https://github.com/apache/sedona-spatialbench">the README</a>
for a full description of how to use the SpatialBench data generators.</p>
@@ -798,7 +970,7 @@
<span class="md-icon" title="Last update">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21
13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6
0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4
2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10
9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1
7.5 7.1 7.9l-.1.2z"></path></svg>
</span>
- <span class="git-revision-date-localized-plugin
git-revision-date-localized-plugin-datetime" title="September 19, 2025 20:43:27
UTC">September 19, 2025 20:43:27</span>
+ <span class="git-revision-date-localized-plugin
git-revision-date-localized-plugin-datetime" title="September 19, 2025 21:04:40
UTC">September 19, 2025 21:04:40</span>
</span>
@@ -834,7 +1006,7 @@
<nav class="md-footer__inner md-grid" aria-label="Footer">
- <a href=".." class="md-footer__link md-footer__link--prev"
aria-label="Previous: SpatialBench">
+ <a href="../overview-methodology/" class="md-footer__link
md-footer__link--prev" aria-label="Previous: Overview and Methodology">
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24
24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8
11z"></path></svg>
@@ -844,7 +1016,7 @@
Previous
</span>
<div class="md-ellipsis">
- SpatialBench
+ Overview and Methodology
</div>
</div>
</a>
diff --git a/image/datasets-continents.png b/image/datasets-continents.png
new file mode 100644
index 0000000..b31f05a
Binary files /dev/null and b/image/datasets-continents.png differ
diff --git a/image/datasets-schema.png b/image/datasets-schema.png
new file mode 100644
index 0000000..06153fe
Binary files /dev/null and b/image/datasets-schema.png differ
diff --git a/index.html b/index.html
index 8a11bae..d29c83f 100644
--- a/index.html
+++ b/index.html
@@ -204,37 +204,19 @@
- <li class="md-tabs__item md-tabs__item--active">
- <a href="." class="md-tabs__link">
-
-
-
+
+ <li class="md-tabs__item md-tabs__item--active">
+ <a href="." class="md-tabs__link">
+
- SpatialBench
-
- </a>
- </li>
+ Home
-
-
-
-
-
-
- <li class="md-tabs__item">
- <a href="overview-methodology/" class="md-tabs__link">
-
-
-
+ </a>
+ </li>
- Overview and Methodology
-
- </a>
- </li>
-
@@ -371,6 +353,54 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class="md-nav__item md-nav__item--active md-nav__item--section
md-nav__item--nested">
+
+
+
+ <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1"
checked>
+
+
+ <label class="md-nav__link" for="__nav_1" id="__nav_1_label"
tabindex="">
+
+
+
+ <span class="md-ellipsis">
+ Home
+
+ </span>
+
+
+ <span class="md-nav__icon md-icon"></span>
+ </label>
+
+ <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label"
aria-expanded="true">
+ <label class="md-nav__title" for="__nav_1">
+ <span class="md-nav__icon md-icon"></span>
+ Home
+ </label>
+ <ul class="md-nav__list" data-md-scrollfix>
+
+
+
+
+
+
+
+
+
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
@@ -467,9 +497,10 @@
</li>
-
-
-
+
+
+
+
@@ -489,6 +520,37 @@
</li>
+
+
+
+
+
+
+
+
+ <li class="md-nav__item">
+ <a href="datasets-generators/" class="md-nav__link">
+
+
+
+ <span class="md-ellipsis">
+ Datasets and Generators
+
+ </span>
+
+
+ </a>
+ </li>
+
+
+
+
+ </ul>
+ </nav>
+
+ </li>
+
+
@@ -754,7 +816,7 @@
<span class="md-icon" title="Last update">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21
13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6
0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4
2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10
9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1
7.5 7.1 7.9l-.1.2z"></path></svg>
</span>
- <span class="git-revision-date-localized-plugin
git-revision-date-localized-plugin-datetime" title="September 19, 2025 20:43:27
UTC">September 19, 2025 20:43:27</span>
+ <span class="git-revision-date-localized-plugin
git-revision-date-localized-plugin-datetime" title="September 19, 2025 21:04:40
UTC">September 19, 2025 21:04:40</span>
</span>
diff --git a/overview-methodology/index.html b/overview-methodology/index.html
index e1d8d02..955f20b 100644
--- a/overview-methodology/index.html
+++ b/overview-methodology/index.html
@@ -11,6 +11,8 @@
<link rel="prev" href="..">
+ <link rel="next" href="../datasets-generators/">
+
<link rel="icon" href="../image/sedona_logo_symbol.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.20">
@@ -201,39 +203,21 @@
-
- <li class="md-tabs__item">
- <a href=".." class="md-tabs__link">
-
-
-
- SpatialBench
-
- </a>
- </li>
-
-
-
-
-
-
-
-
- <li class="md-tabs__item md-tabs__item--active">
- <a href="./" class="md-tabs__link">
-
-
-
+ <li class="md-tabs__item md-tabs__item--active">
+ <a href=".." class="md-tabs__link">
+
- Overview and Methodology
+
+ Home
- </a>
- </li>
+ </a>
+ </li>
+
@@ -367,6 +351,54 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ <li class="md-nav__item md-nav__item--active md-nav__item--section
md-nav__item--nested">
+
+
+
+ <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1"
checked>
+
+
+ <label class="md-nav__link" for="__nav_1" id="__nav_1_label"
tabindex="">
+
+
+
+ <span class="md-ellipsis">
+ Home
+
+ </span>
+
+
+ <span class="md-nav__icon md-icon"></span>
+ </label>
+
+ <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label"
aria-expanded="true">
+ <label class="md-nav__title" for="__nav_1">
+ <span class="md-nav__icon md-icon"></span>
+ Home
+ </label>
+ <ul class="md-nav__list" data-md-scrollfix>
+
+
+
+
+
<li class="md-nav__item">
@@ -384,9 +416,10 @@
</li>
-
-
-
+
+
+
+
@@ -511,6 +544,37 @@
</li>
+
+
+
+
+
+
+
+
+ <li class="md-nav__item">
+ <a href="../datasets-generators/" class="md-nav__link">
+
+
+
+ <span class="md-ellipsis">
+ Datasets and Generators
+
+ </span>
+
+
+ </a>
+ </li>
+
+
+
+
+ </ul>
+ </nav>
+
+ </li>
+
+
@@ -798,7 +862,7 @@
<span class="md-icon" title="Last update">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21
13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6
0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4
2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10
9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1
7.5 7.1 7.9l-.1.2z"></path></svg>
</span>
- <span class="git-revision-date-localized-plugin
git-revision-date-localized-plugin-datetime" title="September 19, 2025 20:43:27
UTC">September 19, 2025 20:43:27</span>
+ <span class="git-revision-date-localized-plugin
git-revision-date-localized-plugin-datetime" title="September 19, 2025 21:04:40
UTC">September 19, 2025 21:04:40</span>
</span>
@@ -850,6 +914,22 @@
</a>
+
+ <a href="../datasets-generators/" class="md-footer__link
md-footer__link--next" aria-label="Next: Datasets and Generators">
+ <div class="md-footer__title">
+ <span class="md-footer__direction">
+ Next
+ </span>
+ <div class="md-ellipsis">
+ Datasets and Generators
+ </div>
+ </div>
+ <div class="md-footer__button md-icon">
+
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24
24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16
11z"></path></svg>
+ </div>
+ </a>
+
</nav>
diff --git a/search/search_index.json b/search/search_index.json
index af40317..1895e52 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Sedona
SpatialBench","text":"<p>Sedona SpatialBench makes it easy to run spatial
benchmarks on a realistic dataset with any query engine.</p> <p>The methodology
is unbiased and the benchmarks in any environment to compare relative
performance between
runtimes.</p>"},{"location":"#why-spatialbench","title":"Why
SpatialBench","text":"<p>SpatialBench is a geospatial benchmark for [...]
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Sedona
SpatialBench","text":"<p>Sedona SpatialBench makes it easy to run spatial
benchmarks on a realistic dataset with any query engine.</p> <p>The methodology
is unbiased and the benchmarks in any environment to compare relative
performance between
runtimes.</p>"},{"location":"#why-spatialbench","title":"Why
SpatialBench","text":"<p>SpatialBench is a geospatial benchmark for [...]
\ No newline at end of file