Hi, Please find attached the next version of the extensible toast support. There basically are two changes:
* handle indirect toast tuples properly in heap_tuple_fetch_attr and related places * minor comment adjustments Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
>From 5c7079067f870a65d65ee09cccbf743c88b64c82 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Tue, 11 Jun 2013 23:25:26 +0200 Subject: [PATCH] Add support for multiple kinds of external toast datums There are several usecases where our current representation of external toast datums is limiting: * adding new compression schemes * avoidance of repeated detoasting * externally decoded toast tuples For that support 'tags' on external (varattrib_1b_e) varlenas which recoin the current va_len_1be field to store the tag (or type) of a varlena. To determine the actual length a macro VARTAG_SIZE(tag) is added which can be used to map from a tag to the actual length. This patch adds support for 'indirect' tuples which point to some externally allocated memory containing a toast tuple. It also implements the stub for a different compression algorithm. --- src/backend/access/heap/tuptoaster.c | 142 +++++++++++++++++++++++++++++++---- src/include/c.h | 2 + src/include/postgres.h | 84 +++++++++++++++------ 3 files changed, 191 insertions(+), 37 deletions(-) diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index fc37ceb..5ee5e33 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -87,11 +87,11 @@ static struct varlena *toast_fetch_datum_slice(struct varlena * attr, * heap_tuple_fetch_attr - * * Public entry point to get back a toasted value from - * external storage (possibly still in compressed format). + * external source (possibly still in compressed format). * * This will return a datum that contains all the data internally, ie, not - * relying on external storage, but it can still be compressed or have a short - * header. + * relying on external storage or memory, but it can still be compressed or + * have a short header. ---------- */ struct varlena * @@ -99,13 +99,35 @@ heap_tuple_fetch_attr(struct varlena * attr) { struct varlena *result; - if (VARATT_IS_EXTERNAL(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { /* * This is an external stored plain value */ result = toast_fetch_datum(attr); } + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + /* + * copy into the caller's memory context. That's not required in all + * cases but sufficient for now since this is mainly used when we need + * to persist a Datum for unusually long time, like in a HOLD cursor. + */ + struct varatt_indirect redirect; + VARATT_EXTERNAL_GET_POINTER(redirect, attr); + attr = (struct varlena *)redirect.pointer; + + /* nested indirect Datums aren't allowed */ + Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); + + /* doesn't make much sense, but better handle it */ + if (VARATT_IS_EXTERNAL_ONDISK(attr)) + return heap_tuple_fetch_attr(attr); + + /* copy datum verbatim */ + result = (struct varlena *) palloc(VARSIZE_ANY(attr)); + memcpy(result, attr, VARSIZE_ANY(attr)); + } else { /* @@ -128,7 +150,7 @@ heap_tuple_fetch_attr(struct varlena * attr) struct varlena * heap_tuple_untoast_attr(struct varlena * attr) { - if (VARATT_IS_EXTERNAL(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { /* * This is an externally stored datum --- fetch it back from there @@ -145,6 +167,17 @@ heap_tuple_untoast_attr(struct varlena * attr) pfree(tmp); } } + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + struct varatt_indirect redirect; + VARATT_EXTERNAL_GET_POINTER(redirect, attr); + attr = (struct varlena *)redirect.pointer; + + /* nested indirect Datums aren't allowed */ + Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); + + attr = heap_tuple_untoast_attr(attr); + } else if (VARATT_IS_COMPRESSED(attr)) { /* @@ -191,7 +224,7 @@ heap_tuple_untoast_attr_slice(struct varlena * attr, char *attrdata; int32 attrsize; - if (VARATT_IS_EXTERNAL(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { struct varatt_external toast_pointer; @@ -204,6 +237,17 @@ heap_tuple_untoast_attr_slice(struct varlena * attr, /* fetch it back (compressed marker will get set automatically) */ preslice = toast_fetch_datum(attr); } + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + struct varatt_indirect redirect; + VARATT_EXTERNAL_GET_POINTER(redirect, attr); + + /* nested indirect Datums aren't allowed */ + Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer)); + + return heap_tuple_untoast_attr_slice(redirect.pointer, + sliceoffset, slicelength); + } else preslice = attr; @@ -267,7 +311,7 @@ toast_raw_datum_size(Datum value) struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; - if (VARATT_IS_EXTERNAL(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { /* va_rawsize is the size of the original datum -- including header */ struct varatt_external toast_pointer; @@ -275,6 +319,16 @@ toast_raw_datum_size(Datum value) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); result = toast_pointer.va_rawsize; } + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + struct varatt_indirect toast_pointer; + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + + /* nested indirect Datums aren't allowed */ + Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer)); + + return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer)); + } else if (VARATT_IS_COMPRESSED(attr)) { /* here, va_rawsize is just the payload size */ @@ -308,7 +362,7 @@ toast_datum_size(Datum value) struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; - if (VARATT_IS_EXTERNAL(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { /* * Attribute is stored externally - return the extsize whether @@ -320,6 +374,16 @@ toast_datum_size(Datum value) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); result = toast_pointer.va_extsize; } + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + struct varatt_indirect toast_pointer; + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + + /* nested indirect Datums aren't allowed */ + Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); + + return toast_datum_size(PointerGetDatum(toast_pointer.pointer)); + } else if (VARATT_IS_SHORT(attr)) { result = VARSIZE_SHORT(attr); @@ -387,12 +451,56 @@ toast_delete(Relation rel, HeapTuple oldtup) { Datum value = toast_values[i]; - if (!toast_isnull[i] && VARATT_IS_EXTERNAL(PointerGetDatum(value))) + if (toast_isnull[i]) + continue; + else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value))) toast_delete_datum(rel, value); + else if (VARATT_IS_EXTERNAL_INDIRECT(PointerGetDatum(value))) + elog(ERROR, "deleteing a tuple indirect datums doesn't make sense"); } } } +/* ---------- + * toast_datum_differs - + * + * Determine whether two toasted Datums are the same and don't have to be + * stored again. + * ---------- + */ +static bool +toast_datum_differs(struct varlena *old_value, struct varlena *new_value) +{ + Assert(VARATT_IS_EXTERNAL(old_value)); + Assert(VARATT_IS_EXTERNAL(new_value)); + + /* fast path for the common case where we have the toast oid available */ + if (VARATT_IS_EXTERNAL_ONDISK(old_value) && + VARATT_IS_EXTERNAL_ONDISK(new_value)) + return memcmp((char *) old_value, (char *) new_value, + VARSIZE_EXTERNAL(old_value)) != 0; + + /* + * compare size of tuples, so we don't uselessly detoast/decompress tuples + * if they can't be the same anyway. + */ + if (toast_raw_datum_size(PointerGetDatum(old_value)) != + toast_raw_datum_size(PointerGetDatum(new_value))) + return false; + + old_value = heap_tuple_untoast_attr(old_value); + new_value = heap_tuple_untoast_attr(new_value); + + Assert(!VARATT_IS_EXTERNAL(old_value)); + Assert(!VARATT_IS_EXTERNAL(new_value)); + Assert(!VARATT_IS_COMPRESSED(old_value)); + Assert(!VARATT_IS_COMPRESSED(new_value)); + Assert(VARSIZE_ANY_EXHDR(old_value) == VARSIZE_ANY_EXHDR(new_value)); + + /* compare payload, we're fine with unaligned data */ + return memcmp(VARDATA_ANY(old_value), VARDATA_ANY(new_value), + VARSIZE_ANY_EXHDR(old_value)) != 0; +} /* ---------- * toast_insert_or_update - @@ -497,8 +605,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, VARATT_IS_EXTERNAL(old_value)) { if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) || - memcmp((char *) old_value, (char *) new_value, - VARSIZE_EXTERNAL(old_value)) != 0) + toast_datum_differs(old_value, new_value)) { /* * The old external stored value isn't needed any more @@ -1258,6 +1365,8 @@ toast_save_datum(Relation rel, Datum value, int32 data_todo; Pointer dval = DatumGetPointer(value); + Assert(!VARATT_IS_EXTERNAL(value)); + /* * Open the toast relation and its index. We can use the index to check * uniqueness of the OID we assign to the toasted item, even though it has @@ -1341,7 +1450,7 @@ toast_save_datum(Relation rel, Datum value, { struct varatt_external old_toast_pointer; - Assert(VARATT_IS_EXTERNAL(oldexternal)); + Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal)); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal); if (old_toast_pointer.va_toastrelid == rel->rd_toastoid) @@ -1456,7 +1565,7 @@ toast_save_datum(Relation rel, Datum value, * Create the TOAST pointer value that we'll return */ result = (struct varlena *) palloc(TOAST_POINTER_SIZE); - SET_VARSIZE_EXTERNAL(result, TOAST_POINTER_SIZE); + SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK); memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer)); return PointerGetDatum(result); @@ -1480,7 +1589,7 @@ toast_delete_datum(Relation rel, Datum value) SysScanDesc toastscan; HeapTuple toasttup; - if (!VARATT_IS_EXTERNAL(attr)) + if (!VARATT_IS_EXTERNAL_ONDISK(attr)) return; /* Must copy to access aligned fields */ @@ -1608,6 +1717,9 @@ toast_fetch_datum(struct varlena * attr) char *chunkdata; int32 chunksize; + if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + elog(ERROR, "shouldn't be called for indirect tuples"); + /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); @@ -1775,7 +1887,7 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length) int32 chcpystrt; int32 chcpyend; - Assert(VARATT_IS_EXTERNAL(attr)); + Assert(VARATT_IS_EXTERNAL_ONDISK(attr)); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); diff --git a/src/include/c.h b/src/include/c.h index f2c9e12..7193af6 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -573,6 +573,8 @@ typedef NameData *Name; #define AssertMacro(condition) ((void)true) #define AssertArg(condition) #define AssertState(condition) +#define TrapMacro(condition, errorType) (true) + #elif defined(FRONTEND) #include <assert.h> diff --git a/src/include/postgres.h b/src/include/postgres.h index 30e1dee..d57569f 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -54,23 +54,53 @@ */ /* - * struct varatt_external is a "TOAST pointer", that is, the information - * needed to fetch a stored-out-of-line Datum. The data is compressed - * if and only if va_extsize < va_rawsize - VARHDRSZ. This struct must not - * contain any padding, because we sometimes compare pointers using memcmp. + * struct varatt_external is a "TOAST pointer", that is, the information needed + * to fetch a Datum stored in an out-of-line on-disk Datum. The data is + * compressed if and only if va_extsize < va_rawsize - VARHDRSZ. This struct + * must not contain any padding, because we sometimes compare pointers using + * memcmp. * * Note that this information is stored unaligned within actual tuples, so * you need to memcpy from the tuple into a local struct variable before * you can look at these fields! (The reason we use memcmp is to avoid * having to do that just to detect equality of two TOAST pointers...) */ -struct varatt_external +typedef struct varatt_external { int32 va_rawsize; /* Original data size (includes header) */ int32 va_extsize; /* External saved size (doesn't) */ Oid va_valueid; /* Unique ID of value within TOAST table */ Oid va_toastrelid; /* RelID of TOAST table containing it */ -}; +} varatt_external; + +/* + * Out-of-line Datum thats stored in memory in contrast to varatt_external + * pointers which points to data in an external toast relation. + * + * Note that just as varatt_external's the redirection itsel is stored + * unaligned within the tuple. + */ +typedef struct varatt_indirect +{ + struct varlena *pointer; /* Pointer to in-memory varlena */ +} varatt_indirect; + + +/* + * Type of external toast datum stored. The peculiar value for VARTAG_ONDISK + * comes from the requirement for on-disk compatibility with the older + * definitions of varattrib_1b_e where v_tag was named va_len_1be... + */ +typedef enum vartag_external +{ + VARTAG_INDIRECT = 1, + VARTAG_ONDISK = 18 +} vartag_external; + +#define VARTAG_SIZE(tag) \ + ((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \ + (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \ + TrapMacro(false, "unknown vartag")) /* * These structs describe the header of a varlena object that may have been @@ -102,11 +132,12 @@ typedef struct char va_data[1]; /* Data begins here */ } varattrib_1b; +/* inline portion of a short varlena pointing to an external resource */ typedef struct { uint8 va_header; /* Always 0x80 or 0x01 */ - uint8 va_len_1be; /* Physical length of datum */ - char va_data[1]; /* Data (for now always a TOAST pointer) */ + uint8 va_tag; /* Type of datum */ + char va_data[1]; /* Data (of the type indicated by va_tag) */ } varattrib_1b_e; /* @@ -130,6 +161,9 @@ typedef struct * first byte. Also, it is not possible for a 1-byte length word to be zero; * this lets us disambiguate alignment padding bytes from the start of an * unaligned datum. (We now *require* pad bytes to be filled with zero!) + * + * In TOAST datums the tag field in varattrib_1b_e is used to discern whether + * its an indirection pointer or more commonly an on-disk tuple. */ /* @@ -161,8 +195,8 @@ typedef struct (((varattrib_4b *) (PTR))->va_4byte.va_header & 0x3FFFFFFF) #define VARSIZE_1B(PTR) \ (((varattrib_1b *) (PTR))->va_header & 0x7F) -#define VARSIZE_1B_E(PTR) \ - (((varattrib_1b_e *) (PTR))->va_len_1be) +#define VARTAG_1B_E(PTR) \ + (((varattrib_1b_e *) (PTR))->va_tag) #define SET_VARSIZE_4B(PTR,len) \ (((varattrib_4b *) (PTR))->va_4byte.va_header = (len) & 0x3FFFFFFF) @@ -170,9 +204,9 @@ typedef struct (((varattrib_4b *) (PTR))->va_4byte.va_header = ((len) & 0x3FFFFFFF) | 0x40000000) #define SET_VARSIZE_1B(PTR,len) \ (((varattrib_1b *) (PTR))->va_header = (len) | 0x80) -#define SET_VARSIZE_1B_E(PTR,len) \ +#define SET_VARTAG_1B_E(PTR,tag) \ (((varattrib_1b_e *) (PTR))->va_header = 0x80, \ - ((varattrib_1b_e *) (PTR))->va_len_1be = (len)) + ((varattrib_1b_e *) (PTR))->va_tag = (tag)) #else /* !WORDS_BIGENDIAN */ #define VARATT_IS_4B(PTR) \ @@ -193,8 +227,8 @@ typedef struct ((((varattrib_4b *) (PTR))->va_4byte.va_header >> 2) & 0x3FFFFFFF) #define VARSIZE_1B(PTR) \ ((((varattrib_1b *) (PTR))->va_header >> 1) & 0x7F) -#define VARSIZE_1B_E(PTR) \ - (((varattrib_1b_e *) (PTR))->va_len_1be) +#define VARTAG_1B_E(PTR) \ + (((varattrib_1b_e *) (PTR))->va_tag) #define SET_VARSIZE_4B(PTR,len) \ (((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2)) @@ -202,12 +236,12 @@ typedef struct (((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2) | 0x02) #define SET_VARSIZE_1B(PTR,len) \ (((varattrib_1b *) (PTR))->va_header = (((uint8) (len)) << 1) | 0x01) -#define SET_VARSIZE_1B_E(PTR,len) \ +#define SET_VARTAG_1B_E(PTR,tag) \ (((varattrib_1b_e *) (PTR))->va_header = 0x01, \ - ((varattrib_1b_e *) (PTR))->va_len_1be = (len)) + ((varattrib_1b_e *) (PTR))->va_tag = (tag)) #endif /* WORDS_BIGENDIAN */ -#define VARHDRSZ_SHORT 1 +#define VARHDRSZ_SHORT offsetof(varattrib_1b, va_data) #define VARATT_SHORT_MAX 0x7F #define VARATT_CAN_MAKE_SHORT(PTR) \ (VARATT_IS_4B_U(PTR) && \ @@ -215,7 +249,7 @@ typedef struct #define VARATT_CONVERTED_SHORT_SIZE(PTR) \ (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) -#define VARHDRSZ_EXTERNAL 2 +#define VARHDRSZ_EXTERNAL offsetof(varattrib_1b_e, va_data) #define VARDATA_4B(PTR) (((varattrib_4b *) (PTR))->va_4byte.va_data) #define VARDATA_4B_C(PTR) (((varattrib_4b *) (PTR))->va_compressed.va_data) @@ -249,26 +283,32 @@ typedef struct #define VARSIZE_SHORT(PTR) VARSIZE_1B(PTR) #define VARDATA_SHORT(PTR) VARDATA_1B(PTR) -#define VARSIZE_EXTERNAL(PTR) VARSIZE_1B_E(PTR) +#define VARTAG_EXTERNAL(PTR) VARTAG_1B_E(PTR) +#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR))) #define VARDATA_EXTERNAL(PTR) VARDATA_1B_E(PTR) #define VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR) #define VARATT_IS_EXTERNAL(PTR) VARATT_IS_1B_E(PTR) +#define VARATT_IS_EXTERNAL_ONDISK(PTR) \ + (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK) +#define VARATT_IS_EXTERNAL_INDIRECT(PTR) \ + (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_INDIRECT) #define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR) #define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR)) #define SET_VARSIZE(PTR, len) SET_VARSIZE_4B(PTR, len) #define SET_VARSIZE_SHORT(PTR, len) SET_VARSIZE_1B(PTR, len) #define SET_VARSIZE_COMPRESSED(PTR, len) SET_VARSIZE_4B_C(PTR, len) -#define SET_VARSIZE_EXTERNAL(PTR, len) SET_VARSIZE_1B_E(PTR, len) + +#define SET_VARTAG_EXTERNAL(PTR, tag) SET_VARTAG_1B_E(PTR, tag) #define VARSIZE_ANY(PTR) \ - (VARATT_IS_1B_E(PTR) ? VARSIZE_1B_E(PTR) : \ + (VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR) : \ (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR) : \ VARSIZE_4B(PTR))) #define VARSIZE_ANY_EXHDR(PTR) \ - (VARATT_IS_1B_E(PTR) ? VARSIZE_1B_E(PTR)-VARHDRSZ_EXTERNAL : \ + (VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR)-VARHDRSZ_EXTERNAL : \ (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR)-VARHDRSZ_SHORT : \ VARSIZE_4B(PTR)-VARHDRSZ)) -- 1.8.3.1
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers