Hi,

As pointed out in [1], BRIN is not properly handling toasted data, which
may easily lead to index tuples referencing TOAST-ed values. Which is
clearly wrong - it's trivial to trigger failues after a DELETE.

Attached is a patch that aims to fix this - AFAIK the brin_form_tuple
was simply missing the TOAST_INDEX_HACK stuff from index_form_tuple,
which ensures the data is detoasted and (possibly) re-compressed. The
code is mostly the same, with some BRIN-specific tweaks (looking at
oi_typecache instead of the index descriptor, etc.).

I also attach a simple SQL script that I used to trigger the issue. This
needs to be turned into a regression test, I'll work on that tomorrow.


A separate question is what to do about existing indexes - ISTM the only
thing we can do is to tell the users to reindex all BRIN indexes on
varlena values. Something like this:

    select * from pg_class
     where relam = (select oid from pg_am where amname = 'brin')
       and oid in (select attrelid from pg_attribute where attlen = -1
                                    and attstorage in ('e', 'x'));


regards


[1] 
https://www.postgresql.org/message-id/20201001184133.oq5uq75sb45pu3aw%40development

--
Tomas Vondra                  http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/access/brin/brin_tuple.c 
b/src/backend/access/brin/brin_tuple.c
index 46e6b23c87..96eab9e87c 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -38,6 +38,17 @@
 #include "utils/datum.h"
 #include "utils/memutils.h"
 
+#include "access/detoast.h"
+#include "access/heaptoast.h"
+#include "access/toast_internals.h"
+
+/*
+ * This enables de-toasting of index entries.  Needed until VACUUM is
+ * smart enough to rebuild indexes from scratch.
+ */
+#define TOAST_INDEX_HACK
+
+
 static inline void brin_deconstruct_tuple(BrinDesc *brdesc,
                                                                                
  char *tp, bits8 *nullbits, bool nulls,
                                                                                
  Datum *values, bool *allnulls, bool *hasnulls);
@@ -99,6 +110,12 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, 
BrinMemTuple *tuple,
        Size            len,
                                hoff,
                                data_len;
+       int                     i;
+
+#ifdef TOAST_INDEX_HACK
+       Datum      *untoasted_values;
+       int                     nuntoasted = 0;
+#endif
 
        Assert(brdesc->bd_totalstored > 0);
 
@@ -107,6 +124,10 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, 
BrinMemTuple *tuple,
        phony_nullbitmap = (bits8 *)
                palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored));
 
+#ifdef TOAST_INDEX_HACK
+       untoasted_values = (Datum *) palloc(sizeof(Datum) * 
brdesc->bd_totalstored);
+#endif
+
        /*
         * Set up the values/nulls arrays for heap_fill_tuple
         */
@@ -141,7 +162,75 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, 
BrinMemTuple *tuple,
                for (datumno = 0;
                         datumno < brdesc->bd_info[keyno]->oi_nstored;
                         datumno++)
-                       values[idxattno++] = 
tuple->bt_columns[keyno].bv_values[datumno];
+               {
+                       Datum value = 
tuple->bt_columns[keyno].bv_values[datumno];
+
+#ifdef TOAST_INDEX_HACK
+
+                       /* We must look at the stored type, not at the index 
descriptor. */
+                       TypeCacheEntry  *atttype = 
brdesc->bd_info[keyno]->oi_typcache[datumno];
+
+                       /* Do we need to free the value at the end? */
+                       bool free_value = false;
+
+                       /* For non-varlena types we don't need to do anything 
special */
+                       if (atttype->typlen != -1)
+                       {
+                               values[idxattno++] = value;
+                               continue;
+                       }
+
+                       /*
+                        * Do nothing if value is not of varlena type. We don't 
need to
+                        * care about NULL values here, thanks to bv_allnulls 
above.
+                        *
+                        * If value is stored EXTERNAL, must fetch it so we are 
not
+                        * depending on outside storage.
+                        *
+                        * XXX Is this actually true? Could it be that the 
summary is
+                        * NULL even for range with non-NULL data? E.g. 
degenerate bloom
+                        * filter may be thrown away, etc.
+                        */
+                       if (VARATT_IS_EXTERNAL(DatumGetPointer(value)))
+                       {
+                               value = 
PointerGetDatum(detoast_external_attr((struct varlena *)
+                                                                               
                                          DatumGetPointer(value)));
+                               free_value = true;
+                       }
+
+                       /*
+                        * If value is above size target, and is of a 
compressible datatype,
+                        * try to compress it in-line.
+                        */
+                       if (!VARATT_IS_EXTENDED(DatumGetPointer(value)) &&
+                               VARSIZE(DatumGetPointer(value)) > 
TOAST_INDEX_TARGET &&
+                               (atttype->typstorage == TYPSTORAGE_EXTENDED ||
+                                atttype->typstorage == TYPSTORAGE_MAIN))
+                       {
+                               Datum           cvalue = 
toast_compress_datum(value);
+
+                               if (DatumGetPointer(cvalue) != NULL)
+                               {
+                                       /* successful compression */
+                                       if (free_value)
+                                               pfree(DatumGetPointer(value));
+
+                                       value = cvalue;
+                                       free_value = true;
+                               }
+                       }
+
+                       /*
+                        * If we untoasted / compressed the value, we need to 
free it
+                        * after forming the index tuple.
+                        */
+                       if (free_value)
+                               untoasted_values[nuntoasted++] = value;
+
+#endif
+
+                       values[idxattno++] = value;
+               }
        }
 
        /* Assert we did not overrun temp arrays */
@@ -193,6 +282,11 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, 
BrinMemTuple *tuple,
        pfree(nulls);
        pfree(phony_nullbitmap);
 
+#ifdef TOAST_INDEX_HACK
+       for (i = 0; i < nuntoasted; i++)
+               pfree(DatumGetPointer(untoasted_values[i]));
+#endif
+
        /*
         * Now fill in the real null bitmasks.  allnulls first.
         */

Attachment: brin-test.sql
Description: application/sql

Reply via email to