Hi, As pointed out in [1], BRIN is not properly handling toasted data, which may easily lead to index tuples referencing TOAST-ed values. Which is clearly wrong - it's trivial to trigger failues after a DELETE.
Attached is a patch that aims to fix this - AFAIK the brin_form_tuple
was simply missing the TOAST_INDEX_HACK stuff from index_form_tuple,
which ensures the data is detoasted and (possibly) re-compressed. The
code is mostly the same, with some BRIN-specific tweaks (looking at
oi_typecache instead of the index descriptor, etc.).
I also attach a simple SQL script that I used to trigger the issue. This
needs to be turned into a regression test, I'll work on that tomorrow.
A separate question is what to do about existing indexes - ISTM the only
thing we can do is to tell the users to reindex all BRIN indexes on
varlena values. Something like this:
select * from pg_class
where relam = (select oid from pg_am where amname = 'brin')
and oid in (select attrelid from pg_attribute where attlen = -1
and attstorage in ('e', 'x'));
regards
[1]
https://www.postgresql.org/message-id/20201001184133.oq5uq75sb45pu3aw%40development
--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/access/brin/brin_tuple.c
b/src/backend/access/brin/brin_tuple.c
index 46e6b23c87..96eab9e87c 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -38,6 +38,17 @@
#include "utils/datum.h"
#include "utils/memutils.h"
+#include "access/detoast.h"
+#include "access/heaptoast.h"
+#include "access/toast_internals.h"
+
+/*
+ * This enables de-toasting of index entries. Needed until VACUUM is
+ * smart enough to rebuild indexes from scratch.
+ */
+#define TOAST_INDEX_HACK
+
+
static inline void brin_deconstruct_tuple(BrinDesc *brdesc,
char *tp, bits8 *nullbits, bool nulls,
Datum *values, bool *allnulls, bool *hasnulls);
@@ -99,6 +110,12 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno,
BrinMemTuple *tuple,
Size len,
hoff,
data_len;
+ int i;
+
+#ifdef TOAST_INDEX_HACK
+ Datum *untoasted_values;
+ int nuntoasted = 0;
+#endif
Assert(brdesc->bd_totalstored > 0);
@@ -107,6 +124,10 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno,
BrinMemTuple *tuple,
phony_nullbitmap = (bits8 *)
palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored));
+#ifdef TOAST_INDEX_HACK
+ untoasted_values = (Datum *) palloc(sizeof(Datum) *
brdesc->bd_totalstored);
+#endif
+
/*
* Set up the values/nulls arrays for heap_fill_tuple
*/
@@ -141,7 +162,75 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno,
BrinMemTuple *tuple,
for (datumno = 0;
datumno < brdesc->bd_info[keyno]->oi_nstored;
datumno++)
- values[idxattno++] =
tuple->bt_columns[keyno].bv_values[datumno];
+ {
+ Datum value =
tuple->bt_columns[keyno].bv_values[datumno];
+
+#ifdef TOAST_INDEX_HACK
+
+ /* We must look at the stored type, not at the index
descriptor. */
+ TypeCacheEntry *atttype =
brdesc->bd_info[keyno]->oi_typcache[datumno];
+
+ /* Do we need to free the value at the end? */
+ bool free_value = false;
+
+ /* For non-varlena types we don't need to do anything
special */
+ if (atttype->typlen != -1)
+ {
+ values[idxattno++] = value;
+ continue;
+ }
+
+ /*
+ * Do nothing if value is not of varlena type. We don't
need to
+ * care about NULL values here, thanks to bv_allnulls
above.
+ *
+ * If value is stored EXTERNAL, must fetch it so we are
not
+ * depending on outside storage.
+ *
+ * XXX Is this actually true? Could it be that the
summary is
+ * NULL even for range with non-NULL data? E.g.
degenerate bloom
+ * filter may be thrown away, etc.
+ */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(value)))
+ {
+ value =
PointerGetDatum(detoast_external_attr((struct varlena *)
+
DatumGetPointer(value)));
+ free_value = true;
+ }
+
+ /*
+ * If value is above size target, and is of a
compressible datatype,
+ * try to compress it in-line.
+ */
+ if (!VARATT_IS_EXTENDED(DatumGetPointer(value)) &&
+ VARSIZE(DatumGetPointer(value)) >
TOAST_INDEX_TARGET &&
+ (atttype->typstorage == TYPSTORAGE_EXTENDED ||
+ atttype->typstorage == TYPSTORAGE_MAIN))
+ {
+ Datum cvalue =
toast_compress_datum(value);
+
+ if (DatumGetPointer(cvalue) != NULL)
+ {
+ /* successful compression */
+ if (free_value)
+ pfree(DatumGetPointer(value));
+
+ value = cvalue;
+ free_value = true;
+ }
+ }
+
+ /*
+ * If we untoasted / compressed the value, we need to
free it
+ * after forming the index tuple.
+ */
+ if (free_value)
+ untoasted_values[nuntoasted++] = value;
+
+#endif
+
+ values[idxattno++] = value;
+ }
}
/* Assert we did not overrun temp arrays */
@@ -193,6 +282,11 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno,
BrinMemTuple *tuple,
pfree(nulls);
pfree(phony_nullbitmap);
+#ifdef TOAST_INDEX_HACK
+ for (i = 0; i < nuntoasted; i++)
+ pfree(DatumGetPointer(untoasted_values[i]));
+#endif
+
/*
* Now fill in the real null bitmasks. allnulls first.
*/
brin-test.sql
Description: application/sql
