On 12/16/2014 07:48 PM, Teodor Sigaev wrote:
/*
* This struct is what we actually keep in index->rd_amcache. It includes
* static configuration information as well as the lastUsedPages cache.
*/
typedef struct SpGistCache
{
spgConfigOut config; /* filled in by opclass config method */
SpGistTypeDesc attType; /* type of input data and leaf values */
SpGistTypeDesc attPrefixType; /* type of inner-tuple prefix
values */
SpGistTypeDesc attLabelType; /* type of node label values */
SpGistLUPCache lastUsedPages; /* local storage of last-used
info */
} SpGistCache;
Now that the input data type and leaf data type can be different, which
one is "attType"? It's the leaf data type, as the patch stands. I
renamed that to attLeafType, and went fixing all the references to it.
In most places it's just a matter of search & replace, but what about
the reconstructed datum? In freeScanStackEntry, we assume that
att[Leaf]Type is the datatype for reconstructedValue, but I believe
assume elsewhere that reconstructedValue is of the same data type as the
input. At least if the opclass supports index-only scans.
I think we'll need a separate SpGistTypeDesc for the input type. Or
perhaps a separate SpGistTypeDesc for the reconstructed value and an
optional decompress method to turn the reconstructedValue back into an
actual reconstructed input datum. Or something like that.
Attached is a patch with the kibitzing I did so far.
- Heikki
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index 56827e5..de158c3 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -201,20 +201,21 @@
<para>
There are five user-defined methods that an index operator class for
- <acronym>SP-GiST</acronym> must provide. All five follow the convention
- of accepting two <type>internal</> arguments, the first of which is a
- pointer to a C struct containing input values for the support method,
- while the second argument is a pointer to a C struct where output values
- must be placed. Four of the methods just return <type>void</>, since
- all their results appear in the output struct; but
+ <acronym>SP-GiST</acronym> must provide and one optional. All five mandatory
+ methos follow the convention of accepting two <type>internal</> arguments,
+ the first of which is a pointer to a C struct containing input values for
+ the support method, while the second argument is a pointer to a C struct
+ where output values must be placed. Four of the methods just return
+ <type>void</>, since all their results appear in the output struct; but
<function>leaf_consistent</> additionally returns a <type>boolean</> result.
The methods must not modify any fields of their input structs. In all
cases, the output struct is initialized to zeroes before calling the
- user-defined method.
+ user-defined method. Optional method <function>compress</> accepts
+ datum to be indexed and returns values which actually will be indexed.
</para>
<para>
- The five user-defined methods are:
+ The five mandatory user-defined methods are:
</para>
<variablelist>
@@ -244,6 +245,7 @@ typedef struct spgConfigOut
{
Oid prefixType; /* Data type of inner-tuple prefixes */
Oid labelType; /* Data type of inner-tuple node labels */
+ Oid leafType; /* Data type of leaf */
bool canReturnData; /* Opclass can reconstruct original data */
bool longValuesOK; /* Opclass can cope with values > 1 page */
} spgConfigOut;
@@ -264,7 +266,15 @@ typedef struct spgConfigOut
<structfield>longValuesOK</> should be set true only when the
<structfield>attType</> is of variable length and the operator
class is capable of segmenting long values by repeated suffixing
- (see <xref linkend="spgist-limits">).
+ (see <xref linkend="spgist-limits">). <structfield>leafType</>
+ usually has the same value as <structfield>attType</> but if
+ it's different then optional method <function>compress</>
+ should be provided. Method <function>compress</> is responsible
+ for transformation from <structfield>attType</> to
+ <structfield>leafType</>. In this case all other function should
+ accept <structfield>leafType</> values. Note: both consistent
+ functions will get <structfield>scankeys</> unchanged, without
+ <function>compress</> transformation.
</para>
</listitem>
</varlistentry>
@@ -690,6 +700,24 @@ typedef struct spgLeafConsistentOut
</varlistentry>
</variablelist>
+ <para>
+ The optional user-defined method is:
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><function>Datum compress(Datum in)</></term>
+ <listitem>
+ <para>
+ Converts the data item into a format suitable for physical storage in
+ an index page. It accepts <structname>spgConfigIn</>.<structfield>attType</>
+ value and return <structname>spgConfigOut</>.<structfield>leafType</>
+ value. Output value should not be toasted.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
<para>
All the SP-GiST support methods are normally called in a short-lived
memory context; that is, <varname>CurrentMemoryContext</> will be reset
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 1a17cc4..06c0680 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -1854,21 +1854,36 @@ spgdoinsert(Relation index, SpGistState *state,
FmgrInfo *procinfo = NULL;
/*
- * Look up FmgrInfo of the user-defined choose function once, to save
- * cycles in the loop below.
+ * Prepare the leaf datum to insert.
+ *
+ * If there is an optional "compress" method, call it to form the leaf
+ * datum from the input datum. Otherwise we will store the input datum as
+ * is. (We have to detoast it, though. We assume the "compress" method to
+ * return an untoasted value.)
*/
if (!isnull)
- procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
+ {
+ if (OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
+ {
+ procinfo = index_getprocinfo(index, 1, SPGIST_COMPRESS_PROC);
+ leafDatum = FunctionCall1Coll(procinfo,
+ index->rd_indcollation[0],
+ datum);
+ }
+ else if (state->attLeafType.attlen == -1)
+ leafDatum = PointerGetDatum(PG_DETOAST_DATUM(datum));
+ else
+ leafDatum = datum;
+ }
+ else
+ leafDatum = (Datum) 0;
/*
- * Since we don't use index_form_tuple in this AM, we have to make sure
- * value to be inserted is not toasted; FormIndexDatum doesn't guarantee
- * that.
+ * Look up FmgrInfo of the user-defined choose function once, to save
+ * cycles in the loop below.
*/
- if (!isnull && state->attType.attlen == -1)
- datum = PointerGetDatum(PG_DETOAST_DATUM(datum));
-
- leafDatum = datum;
+ if (!isnull)
+ procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
/*
* Compute space needed for a leaf tuple containing the given datum.
@@ -1878,7 +1893,7 @@ spgdoinsert(Relation index, SpGistState *state,
*/
if (!isnull)
leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
- SpGistGetTypeSize(&state->attType, leafDatum);
+ SpGistGetTypeSize(&state->attLeafType, leafDatum);
else
leafSize = SGDTSIZE + sizeof(ItemIdData);
@@ -2093,7 +2108,7 @@ spgdoinsert(Relation index, SpGistState *state,
{
leafDatum = out.result.matchNode.restDatum;
leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
- SpGistGetTypeSize(&state->attType, leafDatum);
+ SpGistGetTypeSize(&state->attLeafType, leafDatum);
}
/*
diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c
index 35cc41b..a4c5592 100644
--- a/src/backend/access/spgist/spgscan.c
+++ b/src/backend/access/spgist/spgscan.c
@@ -39,7 +39,8 @@ typedef struct ScanStackEntry
static void
freeScanStackEntry(SpGistScanOpaque so, ScanStackEntry *stackEntry)
{
- if (!so->state.attType.attbyval &&
+ /* FIXME: Is attLeafType correct for reconstructedValue? */
+ if (!so->state.attLeafType.attbyval &&
DatumGetPointer(stackEntry->reconstructedValue) != NULL)
pfree(DatumGetPointer(stackEntry->reconstructedValue));
pfree(stackEntry);
@@ -539,11 +540,14 @@ redirect:
else
newEntry->level = stackEntry->level;
/* Must copy value out of temp context */
+ /*
+ * FIXME: this assumes that the leaf data type is the same
+ * as the reconstructedValues datatype */
if (out.reconstructedValues)
newEntry->reconstructedValue =
datumCopy(out.reconstructedValues[i],
- so->state.attType.attbyval,
- so->state.attType.attlen);
+ so->state.attLeafType.attbyval,
+ so->state.attLeafType.attlen);
else
newEntry->reconstructedValue = (Datum) 0;
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 1a224ef..dcbb30c 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -74,7 +74,21 @@ spgGetCache(Relation index)
PointerGetDatum(&cache->config));
/* Get the information we need about each relevant datatype */
- fillTypeDesc(&cache->attType, atttype);
+ if (OidIsValid(cache->config.leafType) &&
+ cache->config.leafType != atttype)
+ {
+ if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("compress method must not defined when leaf type is different from input type")));
+
+ fillTypeDesc(&cache->attLeafType, cache->config.leafType);
+ }
+ else
+ {
+ fillTypeDesc(&cache->attLeafType, atttype);
+ }
+
fillTypeDesc(&cache->attPrefixType, cache->config.prefixType);
fillTypeDesc(&cache->attLabelType, cache->config.labelType);
@@ -113,7 +127,7 @@ initSpGistState(SpGistState *state, Relation index)
cache = spgGetCache(index);
state->config = cache->config;
- state->attType = cache->attType;
+ state->attLeafType = cache->attLeafType;
state->attPrefixType = cache->attPrefixType;
state->attLabelType = cache->attLabelType;
@@ -556,7 +570,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
/* compute space needed (note result is already maxaligned) */
size = SGLTHDRSZ;
if (!isnull)
- size += SpGistGetTypeSize(&state->attType, datum);
+ size += SpGistGetTypeSize(&state->attLeafType, datum);
/*
* Ensure that we can replace the tuple with a dead tuple later. This
@@ -572,7 +586,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
tup->nextOffset = InvalidOffsetNumber;
tup->heapPtr = *heapPtr;
if (!isnull)
- memcpyDatum(SGLTDATAPTR(tup), &state->attType, datum);
+ memcpyDatum(SGLTDATAPTR(tup), &state->attLeafType, datum);
return tup;
}
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
index 3aa96bd..bbf6d89 100644
--- a/src/include/access/spgist.h
+++ b/src/include/access/spgist.h
@@ -30,7 +30,8 @@
#define SPGIST_PICKSPLIT_PROC 3
#define SPGIST_INNER_CONSISTENT_PROC 4
#define SPGIST_LEAF_CONSISTENT_PROC 5
-#define SPGISTNProc 5
+#define SPGIST_COMPRESS_PROC 6
+#define SPGISTNProc 6
/*
* Argument structs for spg_config method
@@ -44,6 +45,7 @@ typedef struct spgConfigOut
{
Oid prefixType; /* Data type of inner-tuple prefixes */
Oid labelType; /* Data type of inner-tuple node labels */
+ Oid leafType; /* Data type of leaf (type of SPGIST_COMPRESS_PROC output) */
bool canReturnData; /* Opclass can reconstruct original data */
bool longValuesOK; /* Opclass can cope with values > 1 page */
} spgConfigOut;
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
index 4b6fdee..8ae7e75 100644
--- a/src/include/access/spgist_private.h
+++ b/src/include/access/spgist_private.h
@@ -115,13 +115,13 @@ typedef struct SpGistTypeDesc
typedef struct SpGistState
{
- spgConfigOut config; /* filled in by opclass config method */
+ spgConfigOut config; /* filled in by opclass config method */
- SpGistTypeDesc attType; /* type of input data and leaf values */
- SpGistTypeDesc attPrefixType; /* type of inner-tuple prefix values */
+ SpGistTypeDesc attLeafType; /* type of leaf values */
+ SpGistTypeDesc attPrefixType; /* type of inner-tuple prefix values */
SpGistTypeDesc attLabelType; /* type of node label values */
- char *deadTupleStorage; /* workspace for spgFormDeadTuple */
+ char *deadTupleStorage; /* workspace for spgFormDeadTuple */
TransactionId myXid; /* XID to use when creating a redirect tuple */
bool isBuild; /* true if doing index build */
@@ -174,13 +174,13 @@ typedef SpGistScanOpaqueData *SpGistScanOpaque;
*/
typedef struct SpGistCache
{
- spgConfigOut config; /* filled in by opclass config method */
+ spgConfigOut config; /* filled in by opclass config method */
- SpGistTypeDesc attType; /* type of input data and leaf values */
- SpGistTypeDesc attPrefixType; /* type of inner-tuple prefix values */
+ SpGistTypeDesc attLeafType; /* type of leaf values */
+ SpGistTypeDesc attPrefixType; /* type of inner-tuple prefix values */
SpGistTypeDesc attLabelType; /* type of node label values */
- SpGistLUPCache lastUsedPages; /* local storage of last-used info */
+ SpGistLUPCache lastUsedPages; /* local storage of last-used info */
} SpGistCache;
@@ -298,7 +298,7 @@ typedef SpGistLeafTupleData *SpGistLeafTuple;
#define SGLTHDRSZ MAXALIGN(sizeof(SpGistLeafTupleData))
#define SGLTDATAPTR(x) (((char *) (x)) + SGLTHDRSZ)
-#define SGLTDATUM(x, s) ((s)->attType.attbyval ? \
+#define SGLTDATUM(x, s) ((s)->attLeafType.attbyval ? \
*(Datum *) SGLTDATAPTR(x) : \
PointerGetDatum(SGLTDATAPTR(x)))
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h
index 67b57cd..6427552 100644
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -129,7 +129,7 @@ DESCR("GiST index access method");
DATA(insert OID = 2742 ( gin 0 6 f f f f t t f f t f f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup - gincostestimate ginoptions ));
DESCR("GIN index access method");
#define GIN_AM_OID 2742
-DATA(insert OID = 4000 ( spgist 0 5 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions ));
+DATA(insert OID = 4000 ( spgist 0 6 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions ));
DESCR("SP-GiST index access method");
#define SPGIST_AM_OID 4000
DATA(insert OID = 3580 ( brin 5 14 f f f f t t f t t f f 0 brininsert brinbeginscan - bringetbitmap brinrescan brinendscan brinmarkpos brinrestrpos brinbuild brinbuildempty brinbulkdelete brinvacuumcleanup - brincostestimate brinoptions ));
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers