On 21.09.2017 02:27, Alexander Korotkov wrote:
On Thu, Sep 21, 2017 at 2:06 AM, Darafei "Komяpa" Praliaskouski
<m...@komzpa.net <mailto:m...@komzpa.net>> wrote:
It is possible for bbox->low.x to be NaN when circle->center.x
is and
circle->radius are both +Infinity.
What is rationale behind this circle?
I would prefer to rather forbid any geometries with infs and nans.
However, then upgrade process will suffer. User with such geometries
would get errors during dump/restore, pg_upgraded instances would
still contain invalid values...
It seems to me that any circle with radius of any Infinity should
become a [-Infinity .. Infinity, -Infinity .. Infinity] box.Then
you won't have NaNs, and index structure shouldn't be broken.
We probably should produce [-Infinity .. Infinity, -Infinity ..
Infinity] box for any geometry containing inf or nan. That MBR would
be founded for any query, saying: "index can't help you for this kind
value, only recheck can deal with that". Therefore, we would at least
guarantee that results of sequential scan and index scan are the same.
I have looked at the GiST KNN code and found the same errors for NaNs,
infinities and NULLs as in my SP-GiST KNN patch.
Attached two patches:
1. Fix NaN-inconsistencies in circle's bounding boxes computed in GiST
compress
method leading to the failed Assert(box->low.x <= box->high.x) in
computeDistance() from src/backend/access/gist/gistproc.c by
transforming NaNs
into infinities (corresponding test case provided in the second patch).
2. Fix ordering of NULL, NaN and infinite distances by GiST. This distance
values could be mixed because NULL distances were transformed into
infinities,
and there was no special processing for NaNs in KNN queue's comparison
function.
At first I tried just to set recheck flag for NULL distances, but it did not
work for index-only scans because they do not support rechecking. So I had
to add a special flag for NULL distances.
Should I start a separate thread for this issue and add patches to
commitfest?
--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
>From 52ab493cfe1ec6260578054b71f2c48e77d4850a Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Fri, 22 Sep 2017 02:06:48 +0300
Subject: [PATCH 1/2] Fix circle bounding box inconsistency in GiST compress
method
---
src/backend/access/gist/gistproc.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c
index 08990f5..2699304 100644
--- a/src/backend/access/gist/gistproc.c
+++ b/src/backend/access/gist/gistproc.c
@@ -1149,6 +1149,16 @@ gist_circle_compress(PG_FUNCTION_ARGS)
r->high.y = in->center.y + in->radius;
r->low.y = in->center.y - in->radius;
+ /* avoid box inconsistency by transforming NaNs into infinities */
+ if (isnan(r->high.x))
+ r->high.x = get_float8_infinity();
+ if (isnan(r->high.y))
+ r->high.y = get_float8_infinity();
+ if (isnan(r->low.x))
+ r->low.x = -get_float8_infinity();
+ if (isnan(r->low.y))
+ r->low.y = -get_float8_infinity();
+
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(r),
entry->rel, entry->page,
--
2.7.4
>From 066ad9104ec0e967e20e820977286f001e4055a4 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.glu...@postgrespro.ru>
Date: Thu, 21 Sep 2017 19:09:02 +0300
Subject: [PATCH 2/2] Fix GiST ordering by distance for NULLs and NaNs
---
src/backend/access/gist/gistget.c | 29 ++++++++++-------
src/backend/access/gist/gistscan.c | 36 +++++++++++++++++++--
src/include/access/gist_private.h | 13 ++++++--
src/test/regress/expected/gist.out | 64 ++++++++++++++++++++++++++++++++++++++
src/test/regress/sql/gist.sql | 60 +++++++++++++++++++++++++++++++++++
5 files changed, 184 insertions(+), 18 deletions(-)
diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c
index 760ea0c..7fed700 100644
--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -132,7 +132,7 @@ gistindex_keytest(IndexScanDesc scan,
GISTSTATE *giststate = so->giststate;
ScanKey key = scan->keyData;
int keySize = scan->numberOfKeys;
- double *distance_p;
+ GISTDistance *distance_p;
Relation r = scan->indexRelation;
*recheck_p = false;
@@ -150,7 +150,10 @@ gistindex_keytest(IndexScanDesc scan,
if (GistPageIsLeaf(page)) /* shouldn't happen */
elog(ERROR, "invalid GiST tuple found on leaf page");
for (i = 0; i < scan->numberOfOrderBys; i++)
- so->distances[i] = -get_float8_infinity();
+ {
+ so->distances[i].value = -get_float8_infinity();
+ so->distances[i].isnull = false;
+ }
return true;
}
@@ -248,7 +251,8 @@ gistindex_keytest(IndexScanDesc scan,
if ((key->sk_flags & SK_ISNULL) || isNull)
{
/* Assume distance computes as null and sorts to the end */
- *distance_p = get_float8_infinity();
+ distance_p->value = get_float8_nan();
+ distance_p->isnull = true;
}
else
{
@@ -285,7 +289,8 @@ gistindex_keytest(IndexScanDesc scan,
ObjectIdGetDatum(key->sk_subtype),
PointerGetDatum(&recheck));
*recheck_distances_p |= recheck;
- *distance_p = DatumGetFloat8(dist);
+ distance_p->value = DatumGetFloat8(dist);
+ distance_p->isnull = false;
}
key++;
@@ -319,8 +324,8 @@ gistindex_keytest(IndexScanDesc scan,
* sibling will be processed next.
*/
static void
-gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
- TIDBitmap *tbm, int64 *ntids)
+gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
+ GISTDistance *myDistances, TIDBitmap *tbm, int64 *ntids)
{
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
GISTSTATE *giststate = so->giststate;
@@ -367,7 +372,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
/* Insert it into the queue using same distances as for this page */
memcpy(item->distances, myDistances,
- sizeof(double) * scan->numberOfOrderBys);
+ sizeof(item->distances[0]) * scan->numberOfOrderBys);
pairingheap_add(so->queue, &item->phNode);
@@ -497,7 +502,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
/* Insert it into the queue using new distance data */
memcpy(item->distances, so->distances,
- sizeof(double) * scan->numberOfOrderBys);
+ sizeof(item->distances[0]) * scan->numberOfOrderBys);
pairingheap_add(so->queue, &item->phNode);
@@ -571,8 +576,8 @@ getNextNearest(IndexScanDesc scan)
if (!scan->xs_orderbynulls[i])
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
#endif
- scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]);
- scan->xs_orderbynulls[i] = false;
+ scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i].value);
+ scan->xs_orderbynulls[i] = item->distances[i].isnull;
}
else if (so->orderByTypes[i] == FLOAT4OID)
{
@@ -582,8 +587,8 @@ getNextNearest(IndexScanDesc scan)
if (!scan->xs_orderbynulls[i])
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
#endif
- scan->xs_orderbyvals[i] = Float4GetDatum((float4) item->distances[i]);
- scan->xs_orderbynulls[i] = false;
+ scan->xs_orderbyvals[i] = Float4GetDatum((float4) item->distances[i].value);
+ scan->xs_orderbynulls[i] = item->distances[i].isnull;
}
else
{
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index 058544e..95cb554 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -14,6 +14,8 @@
*/
#include "postgres.h"
+#include <math.h>
+
#include "access/gist_private.h"
#include "access/gistscan.h"
#include "access/relscan.h"
@@ -36,8 +38,36 @@ pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node
/* Order according to distance comparison */
for (i = 0; i < scan->numberOfOrderBys; i++)
{
- if (sa->distances[i] != sb->distances[i])
- return (sa->distances[i] < sb->distances[i]) ? 1 : -1;
+ double da;
+ double db;
+
+ if (sa->distances[i].isnull)
+ {
+ if (sb->distances[i].isnull)
+ continue; /* NULL = NULL */
+
+ return -1; /* NULL > non-NULL */
+ }
+
+ if (sb->distances[i].isnull)
+ return 1; /* non-NULL < NULL */
+
+ da = sa->distances[i].value;
+ db = sb->distances[i].value;
+
+ if (isnan(da))
+ {
+ if (isnan(db))
+ continue; /* NaN = NaN */
+
+ return -1; /* NaN > non-NaN */
+ }
+
+ if (isnan(db))
+ return 1; /* non-NaN < NaN */
+
+ if (da != db)
+ return (da < db) ? 1 : -1;
}
/* Heap items go before inner pages, to ensure a depth-first search */
@@ -81,7 +111,7 @@ gistbeginscan(Relation r, int nkeys, int norderbys)
so->queueCxt = giststate->scanCxt; /* see gistrescan */
/* workspaces with size dependent on numberOfOrderBys: */
- so->distances = palloc(sizeof(double) * scan->numberOfOrderBys);
+ so->distances = palloc(sizeof(so->distances[0]) * scan->numberOfOrderBys);
so->qual_ok = true; /* in case there are zero keys */
if (scan->numberOfOrderBys > 0)
{
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index bfef2df..2ae3162 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -125,6 +125,13 @@ typedef struct GISTSearchHeapItem
* LP_DEAD */
} GISTSearchHeapItem;
+/* Nullable distance */
+typedef struct GISTDistance
+{
+ double value;
+ bool isnull;
+} GISTDistance;
+
/* Unvisited item, either index page or heap tuple */
typedef struct GISTSearchItem
{
@@ -136,13 +143,13 @@ typedef struct GISTSearchItem
/* we must store parentlsn to detect whether a split occurred */
GISTSearchHeapItem heap; /* heap info, if heap tuple */
} data;
- double distances[FLEXIBLE_ARRAY_MEMBER]; /* numberOfOrderBys
+ GISTDistance distances[FLEXIBLE_ARRAY_MEMBER]; /* numberOfOrderBys
* entries */
} GISTSearchItem;
#define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
-#define SizeOfGISTSearchItem(n_distances) (offsetof(GISTSearchItem, distances) + sizeof(double) * (n_distances))
+#define SizeOfGISTSearchItem(n_distances) (offsetof(GISTSearchItem, distances) + sizeof(GISTDistance) * (n_distances))
/*
* GISTScanOpaqueData: private state for a scan of a GiST index
@@ -158,7 +165,7 @@ typedef struct GISTScanOpaqueData
bool firstCall; /* true until first gistgettuple call */
/* pre-allocated workspace arrays */
- double *distances; /* output area for gistindex_keytest */
+ GISTDistance *distances; /* output area for gistindex_keytest */
/* info about killed items if any (killedItems is NULL if never used) */
OffsetNumber *killedItems; /* offset numbers of killed items */
diff --git a/src/test/regress/expected/gist.out b/src/test/regress/expected/gist.out
index 91f9998..6f165e6 100644
--- a/src/test/regress/expected/gist.out
+++ b/src/test/regress/expected/gist.out
@@ -225,3 +225,67 @@ reset enable_seqscan;
reset enable_bitmapscan;
reset enable_indexonlyscan;
drop table gist_tbl;
+-- Test ORDER BY circle <-> point with Inifinities and NaNs
+CREATE TABLE gist_circle_tbl (id int, c circle);
+INSERT INTO gist_circle_tbl
+ SELECT (x - 1) * 100 + y, circle(point(x * 10, y * 10), 1 + (x + y) % 10)
+ FROM generate_series(1, 100) x,
+ generate_series(1, 100) y;
+INSERT INTO gist_circle_tbl
+ SELECT i, '<(200, 300), 5>'
+ FROM generate_series(10001, 11000) AS i;
+INSERT INTO gist_circle_tbl
+ VALUES
+ (11001, NULL),
+ (11002, NULL),
+ (11003, '<(0,100), infinity>'),
+ (11004, '<(-infinity,0),1000>'),
+ (11005, '<(infinity,-infinity),infinity>');
+CREATE INDEX gist_circle_tbl_idx ON gist_circle_tbl USING gist(c);
+-- get ordering results from seq scan
+SET enable_seqscan TO on;
+SET enable_indexscan TO off;
+EXPLAIN (COSTS OFF)
+SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id
+FROM gist_circle_tbl;
+ QUERY PLAN
+------------------------------------------------
+ WindowAgg
+ -> Sort
+ Sort Key: ((c <-> '(123,456)'::point))
+ -> Seq Scan on gist_circle_tbl
+(4 rows)
+
+CREATE TABLE gist_circle_tbl_ord_seq AS
+ SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id
+ FROM gist_circle_tbl;
+-- get ordering results from index scan
+SET enable_seqscan TO off;
+SET enable_indexscan TO on;
+EXPLAIN (COSTS OFF)
+SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id
+FROM gist_circle_tbl;
+ QUERY PLAN
+---------------------------------------------------------------
+ WindowAgg
+ -> Index Scan using gist_circle_tbl_idx on gist_circle_tbl
+ Order By: (c <-> '(123,456)'::point)
+(3 rows)
+
+CREATE TABLE gist_circle_tbl_ord_idx AS
+ SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id
+ FROM gist_circle_tbl;
+-- compare results
+SELECT *
+FROM gist_circle_tbl_ord_seq seq FULL JOIN gist_circle_tbl_ord_idx idx USING (n, id)
+WHERE seq.id IS NULL OR idx.id IS NULL;
+ n | id | dist | dist
+---+----+------+------
+(0 rows)
+
+-- clean up
+RESET enable_seqscan;
+RESET enable_indexscan;
+DROP TABLE gist_circle_tbl;
+DROP TABLE gist_circle_tbl_ord_seq;
+DROP TABLE gist_circle_tbl_ord_idx;
diff --git a/src/test/regress/sql/gist.sql b/src/test/regress/sql/gist.sql
index 49126fd..a603e72 100644
--- a/src/test/regress/sql/gist.sql
+++ b/src/test/regress/sql/gist.sql
@@ -120,3 +120,63 @@ reset enable_bitmapscan;
reset enable_indexonlyscan;
drop table gist_tbl;
+
+
+-- Test ORDER BY circle <-> point with Inifinities and NaNs
+CREATE TABLE gist_circle_tbl (id int, c circle);
+
+INSERT INTO gist_circle_tbl
+ SELECT (x - 1) * 100 + y, circle(point(x * 10, y * 10), 1 + (x + y) % 10)
+ FROM generate_series(1, 100) x,
+ generate_series(1, 100) y;
+
+INSERT INTO gist_circle_tbl
+ SELECT i, '<(200, 300), 5>'
+ FROM generate_series(10001, 11000) AS i;
+
+INSERT INTO gist_circle_tbl
+ VALUES
+ (11001, NULL),
+ (11002, NULL),
+ (11003, '<(0,100), infinity>'),
+ (11004, '<(-infinity,0),1000>'),
+ (11005, '<(infinity,-infinity),infinity>');
+
+CREATE INDEX gist_circle_tbl_idx ON gist_circle_tbl USING gist(c);
+
+-- get ordering results from seq scan
+SET enable_seqscan TO on;
+SET enable_indexscan TO off;
+
+EXPLAIN (COSTS OFF)
+SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id
+FROM gist_circle_tbl;
+
+CREATE TABLE gist_circle_tbl_ord_seq AS
+ SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id
+ FROM gist_circle_tbl;
+
+-- get ordering results from index scan
+SET enable_seqscan TO off;
+SET enable_indexscan TO on;
+
+EXPLAIN (COSTS OFF)
+SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id
+FROM gist_circle_tbl;
+
+CREATE TABLE gist_circle_tbl_ord_idx AS
+ SELECT rank() OVER (ORDER BY c <-> point '123,456') n, c <-> point '123,456' dist, id
+ FROM gist_circle_tbl;
+
+-- compare results
+SELECT *
+FROM gist_circle_tbl_ord_seq seq FULL JOIN gist_circle_tbl_ord_idx idx USING (n, id)
+WHERE seq.id IS NULL OR idx.id IS NULL;
+
+-- clean up
+RESET enable_seqscan;
+RESET enable_indexscan;
+
+DROP TABLE gist_circle_tbl;
+DROP TABLE gist_circle_tbl_ord_seq;
+DROP TABLE gist_circle_tbl_ord_idx;
--
2.7.4
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers