> Or maybe I just don't understand the proposal. Perhaps it'd be best if > jian wrote a patch illustrating the idea, and showing how it performs > compared to the current approach.
currently joel's idea is a int4hashset. based on the code first tomas wrote. it looks like a non-nested an collection of unique int4. external text format looks like {int4, int4,int4} structure looks like (header + capacity slots * int4). Within the capacity slots, some slots are empty, some have unique values. The textual int4hashset looks like a one dimensional array. so I copied/imitated src/backend/utils/adt/arrayfuncs.c code, rewrote a slight generic hashset input and output function. see the attached c file. It works fine for non-null input output for {int4hashset, int8hashset, timestamphashset,intervalhashset,uuidhashset).
/* gcc -I/home/jian/postgres/2023_05_25_beta5421/include/server -fPIC -c /home/jian/Desktop/regress_pgsql/set.c gcc -shared -o /home/jian/Desktop/regress_pgsql/set.so /home/jian/Desktop/regress_pgsql/set.o CREATE OR REPLACE FUNCTION set_in_out_test(cstring,int, int) RETURNS BOOL SET search_path from current AS '/home/jian/Desktop/regress_pgsql/set', 'set_in_out_test' LANGUAGE C IMMUTABLE; select set_in_out_test('{111,-0,+0,-2147483648,2147483647}',23,-1); select set_in_out_test('{}',23,-1); select set_in_out_test('{-1111111,-9223372036854775808,9223372036854775807,-0,+0}',20,-1); select set_in_out_test('{2022-01-01T11:21:21.741077 +05:30, 2022-01-01T11:21:21.741076 +05:30,-infinity,+infinity}',1114,-1); select set_in_out_test('{"1hour", " 0:00","-0:00","2000:10:01.23456789"}',1186,-1); select set_in_out_test('{03aadb61-3e30-4112-a46a-8cd72f29876b,2c1e4b2c-b8f4-470a-843f-dd094e4743a6 ,ef1a3202-a84f-4321-ab3c-45e04bf4c42d,2c1e4b2c-b8f4-470a-843f-dd094e4743a6}',2950,-1); -----------parse fail cases. select set_in_out_test('{{-1111111,-9223372036854775808,9223372036854775807,-0,+0}}',20,-1); select set_in_out_test('{-1111111,-9223372036854775808,9223372036854775807,-0,+0.1}',20,-1); select set_in_out_test('{-1111111,-9223372036854775808,9223372036854775808,-0,+0.1}',20,-1); select set_in_out_test('{2022-01-01T11:21:21.7410as +05:30}',1114,-1); select set_in_out_test('{2022-01-01T11:21:21.7410 +05:3a}',1114,-1); select set_in_out_test('{2022-01-01T11:21:21.7410 +0x:31}',1114,-1); select set_in_out_test('{{2022-01-01T11:21:21.7410 +01:31}}',1114,-1); select set_in_out_test('{NULL,1,2,NULL,NULL}',23,-1); */ #include "postgres.h" #include "access/htup_details.h" #include "catalog/pg_type.h" #include "utils/builtins.h" // #include "utils/array.h" #include "utils/numeric.h" #include "utils/timestamp.h" #include "funcapi.h" #include "utils/lsyscache.h" #include "utils/fmgrprotos.h" #include "common/hashfn.h" #include "utils/uuid.h" PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(set_in_out_test); #define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) #define HASHSET_STEP 13 #define PG_RETURN_SETTYPE_P(x) PG_RETURN_POINTER(x) /* * Arrays are varlena objects, so must meet the varlena convention that * the first int32 of the object contains the total object size in bytes. * Be sure to use VARSIZE() and SET_VARSIZE() to access it, though! * * CAUTION: if you change the header for ordinary arrays you will also * need to change the headers for oidvector and int2vector! */ typedef struct SetType { int32 vl_len_; /* varlena header (do not touch directly!) */ int32 capacity; /* # of capacity */ int32 dataoffset; /* offset to data, or 0 if no bitmap */ int32 nelements; /* number of items added to the hashset */ Oid elemtype; /* element type OID */ } SetType; #define SET_SIZE(a) VARSIZE(a) #define SET_ITEM(a) ((a)->nelements) #define SET_CAPACITY(a) ((a)->capacity) #define SET_HASNULL(a) ((a)->dataoffset != 0) #define SET_ELEMTYPE(a) ((a)->elemtype) #define SET_OVERHEAD_NONULLS(capacity) \ MAXALIGN(sizeof(SetType) + CEIL_DIV(capacity, 8)) #define SET_OVERHEAD_WITHNULLS(capacity,nelements) \ MAXALIGN(sizeof(SetType) + CEIL_DIV(capacity, 8) + \ ((nelements) + 7) / 8) #define SET_BITMAP(a) \ (((char *) (a)) + sizeof(SetType)) #define SET_DATA_OFFSET(a) \ (SET_HASNULL(a) ? (a)->dataoffset : SET_OVERHEAD_NONULLS(SET_CAPACITY(a))) #define SET_NULLBITMAP(a) \ (SET_HASNULL(a) ? \ (bits8 *) (((char *) (a)) + sizeof(SetType) + \ ((a->nelements) + 7) / 8) \ : (bits8 *) NULL) /* * Returns a pointer to the actual array data. */ #define SET_DATA_PTR(a) \ (((char *) (a)) + SET_DATA_OFFSET(a)) typedef struct SetMetaState { Oid element_type; int16 typlen; bool typbyval; char typalign; char typdelim; Oid typioparam; Oid typiofunc; FmgrInfo proc; } SetMetaState; static int SetCount(const char *str, int *dim, char typdelim, Node *escontext); static bool set_get_isnull(const bits8 *nullbitmap, int offset); SetType *set_add_int32(SetType *set, Datum dvalue); SetType *set_add_int64(SetType *set, Datum dvalue); SetType *set_add_interval(SetType *set, Datum dvalue); SetType *set_add_uuid(SetType *set, Datum dvalue); SetType *set_add_timestamp(SetType *set, Datum dvalue); SetType *construct_empty_set(Oid elmtype); SetType *set_init(int capacity,Oid elemtype, bool hasnull); char * SetOut(SetType *retval); void CopySetNulls(SetType *set, Datum *values, bool *nulls, int nitems, int typlen, bool typbyval, char typalign, bool freedata); /* * Check whether a specific array element is NULL * * nullbitmap: pointer to array's null bitmap (NULL if none) * offset: 0-based linear element number of array element */ static bool set_get_isnull(const bits8 *nullbitmap, int offset) { if (nullbitmap == NULL) return false; /* assume not null */ if (nullbitmap[offset / 8] & (1 << (offset % 8))) return false; /* not null */ return true; } /* * array_isspace() --- a non-locale-dependent isspace() * * We used to use isspace() for parsing array values, but that has * undesirable results: an array value might be silently interpreted * differently depending on the locale setting. Now we just hard-wire * the traditional ASCII definition of isspace(). */ static bool set_isspace(char ch) { if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f') return true; return false; } /* * ReadSetStr : * parses the array string pointed to by "arrayStr" and converts the values * to internal format. Unspecified elements are initialized to nulls. * The array dimensions must already have been determined. * * Inputs: * arrayStr: the string to parse. * CAUTION: the contents of "arrayStr" will be modified! * origStr: the unmodified input string, used only in error messages. * nitems: total number of array elements, as already determined. * ndim: number of array dimensions * dim[]: array axis lengths * inputproc: type-specific input procedure for element datatype. * typioparam, typmod: auxiliary values to pass to inputproc. * typdelim: the value delimiter (type-specific). * typlen, typbyval, typalign: storage parameters of element datatype. * * Outputs: * values[]: filled with converted data values. * nulls[]: filled with is-null markers. * *hasnulls: set true iff there are any null elements. * *nbytes: set to total size of data area needed (including alignment * padding but not including array header overhead). * *escontext: if this points to an ErrorSaveContext, details of * any error are reported there. * * Result: * true for success, false for failure (if escontext is provided). * * Note that values[] and nulls[] are allocated by the caller, and must have * nitems elements. */ typedef enum { SET_NO_LEVEL, SET_LEVEL_STARTED, SET_ELEM_STARTED, SET_ELEM_COMPLETED, SET_QUOTED_ELEM_STARTED, SET_QUOTED_ELEM_COMPLETED, SET_ELEM_DELIMITED, SET_LEVEL_COMPLETED, SET_LEVEL_DELIMITED } SetParseState; /* * SetCount * Determines the dimensions for an array string. * * Returns number of dimensions as function result. The axis lengths are * returned in dim[], which must be of size MAXDIM. * * If we detect an error, fill *escontext with error details and return -1 * (unless escontext isn't provided, in which case errors will be thrown). */ #undef MAXDIM #define MAXDIM 1 static int SetCount(const char *str, int *dim, char typdelim, Node *escontext) { int nest_level = 0, i; int ndim = 1, temp[MAXDIM], nelems[MAXDIM], nelems_last[MAXDIM]; bool in_quotes = false; bool eoArray = false; bool empty_array = true; const char *ptr; SetParseState parse_state = SET_NO_LEVEL; for (i = 0; i < MAXDIM; ++i) { temp[i] = dim[i] = nelems_last[i] = 0; nelems[i] = 1; } ptr = str; while (!eoArray) { bool itemdone = false; while (!itemdone) { if (parse_state == SET_ELEM_STARTED || parse_state == SET_QUOTED_ELEM_STARTED) empty_array = false; switch (*ptr) { case '\0': /* Signal a premature end of the string */ ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected end of input."))); case '\\': /* * An escape must be after a level start, after an element * start, or after an element delimiter. In any case we * now must be past an element start. */ if (parse_state != SET_LEVEL_STARTED && parse_state != SET_ELEM_STARTED && parse_state != SET_QUOTED_ELEM_STARTED && parse_state != SET_ELEM_DELIMITED) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '\\'))); if (parse_state != SET_QUOTED_ELEM_STARTED) parse_state = SET_ELEM_STARTED; /* skip the escaped character */ if (*(ptr + 1)) ptr++; else ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected end of input."))); break; case '"': /* * A quote must be after a level start, after a quoted * element start, or after an element delimiter. In any * case we now must be past an element start. */ if (parse_state != SET_LEVEL_STARTED && parse_state != SET_QUOTED_ELEM_STARTED && parse_state != SET_ELEM_DELIMITED) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected array element."))); in_quotes = !in_quotes; if (in_quotes) parse_state = SET_QUOTED_ELEM_STARTED; else parse_state = SET_QUOTED_ELEM_COMPLETED; break; case '{': if (!in_quotes) { /* * A left brace can occur if no nesting has occurred * yet, after a level start, or after a level * delimiter. */ if (parse_state != SET_NO_LEVEL && parse_state != SET_LEVEL_STARTED && parse_state != SET_LEVEL_DELIMITED) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '{'))); parse_state = SET_LEVEL_STARTED; if (nest_level >= MAXDIM) ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", nest_level + 1, MAXDIM))); temp[nest_level] = 0; nest_level++; if (ndim < nest_level) ndim = nest_level; } break; case '}': if (!in_quotes) { /* * A right brace can occur after an element start, an * element completion, a quoted element completion, or * a level completion. */ if (parse_state != SET_ELEM_STARTED && parse_state != SET_ELEM_COMPLETED && parse_state != SET_QUOTED_ELEM_COMPLETED && parse_state != SET_LEVEL_COMPLETED && !(nest_level == 1 && parse_state == SET_LEVEL_STARTED)) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '}'))); parse_state = SET_LEVEL_COMPLETED; if (nest_level == 0) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unmatched \"%c\" character.", '}'))); nest_level--; if (nelems_last[nest_level] != 0 && nelems[nest_level] != nelems_last[nest_level]) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Multidimensional arrays must have " "sub-arrays with matching " "dimensions."))); nelems_last[nest_level] = nelems[nest_level]; nelems[nest_level] = 1; if (nest_level == 0) eoArray = itemdone = true; else { /* * We don't set itemdone here; see comments in * ReadSetStr */ temp[nest_level - 1]++; } } break; default: if (!in_quotes) { if (*ptr == typdelim) { /* * Delimiters can occur after an element start, an * element completion, a quoted element * completion, or a level completion. */ if (parse_state != SET_ELEM_STARTED && parse_state != SET_ELEM_COMPLETED && parse_state != SET_QUOTED_ELEM_COMPLETED && parse_state != SET_LEVEL_COMPLETED) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", typdelim))); if (parse_state == SET_LEVEL_COMPLETED) parse_state = SET_LEVEL_DELIMITED; else parse_state = SET_ELEM_DELIMITED; itemdone = true; nelems[nest_level - 1]++; } else if (!set_isspace(*ptr)) { /* * Other non-space characters must be after a * level start, after an element start, or after * an element delimiter. In any case we now must * be past an element start. */ if (parse_state != SET_LEVEL_STARTED && parse_state != SET_ELEM_STARTED && parse_state != SET_ELEM_DELIMITED) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected array element."))); parse_state = SET_ELEM_STARTED; } } break; } if (!itemdone) ptr++; } temp[ndim - 1]++; ptr++; } /* only whitespace is allowed after the closing brace */ while (*ptr) { if (!set_isspace(*ptr++)) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Junk after closing right brace."))); } /* special case for an empty array */ if (empty_array) return 0; for (i = 0; i < ndim; ++i) dim[i] = temp[i]; return ndim; } bool Array_nulls = true; static bool ReadSetStr(char *arrayStr, const char *origStr, int nitems, int ndim, int *dim, FmgrInfo *inputproc, Oid typioparam, int32 typmod, char typdelim, int typlen, bool typbyval, char typalign, Datum *values, bool *nulls, bool *hasnulls, int32 *nbytes, Node *escontext) { int i; char *srcptr; bool in_quotes = false; bool eoArray = false; bool hasnull; int32 totbytes; int indx = 0; /* Initialize is-null markers to true */ memset(nulls, true, nitems * sizeof(bool)); /* * We have to remove " and \ characters to create a clean item value to * pass to the datatype input routine. We overwrite each item value * in-place within arrayStr to do this. srcptr is the current scan point, * and dstptr is where we are copying to. * * We also want to suppress leading and trailing unquoted whitespace. We * use the leadingspace flag to suppress leading space. Trailing space is * tracked by using dstendptr to point to the last significant output * character. * * The error checking in this routine is mostly pro-forma, since we expect * that SetCount() already validated the string. So we don't bother * with errdetail messages. */ srcptr = arrayStr; while (!eoArray) { bool itemdone = false; bool leadingspace = true; bool hasquoting = false; char *itemstart; char *dstptr; char *dstendptr; itemstart = dstptr = dstendptr = srcptr; while (!itemdone) { switch(*srcptr) { case '\0': /* Signal a premature end of the string */ /* Signal a premature end of the string */ ereturn(escontext, false, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); break; case '\\': /* Skip backslash, copy next character as-is. */ srcptr++; if (*srcptr == '\0') ereturn(escontext,false, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); /* Treat the escaped character as non-whitespace*/ leadingspace = false; dstendptr = dstptr; hasquoting = false; /* can't be a NULL marker */ break; case '"': in_quotes = !in_quotes; if (in_quotes) leadingspace = false; else { /* * Advance dstendptr when we exit in_quotes; this * saves having to do it in all the other in_quotes * cases. */ dstendptr = dstptr; } hasquoting = true; /* can't be a NULL marker */ srcptr++; break; case '{': if (!in_quotes) { srcptr++; } else *dstptr++ = *srcptr++; break; case '}': if (!in_quotes) { eoArray = itemdone = true; srcptr++; } else *dstptr++ = *srcptr++; break; default : if(in_quotes) *dstptr++ = *srcptr++; else if (*srcptr == typdelim) { itemdone = true; srcptr ++; } else if (set_isspace(*srcptr)) { /* * If leading space, drop it immediately. Else, copy * but don't advance dstendptr. */ if(leadingspace) srcptr++; else *dstptr++ = *srcptr++; } else { *dstptr++ = *srcptr++; leadingspace = false; dstendptr = dstptr; } break; } } Assert(dstptr < srcptr); *dstendptr = '\0'; if (Array_nulls && !hasquoting && pg_strcasecmp(itemstart, "NULL") == 0) { /* it's a NULL item */ if (!InputFunctionCallSafe(inputproc, NULL, typioparam, typmod, escontext, &values[indx])) return false; nulls[indx] = true; indx++; } else { elog(INFO,"line[%04d] indx:%d itemstart:%s, typioparam:%d,typmod:%d",__LINE__,indx,itemstart, typioparam,typmod); if (!InputFunctionCallSafe(inputproc, itemstart, typioparam, typmod, escontext, &values[indx])) return false; nulls[indx] = false; indx++; } } /* * Check for nulls, compute total data space needed */ hasnull = false; totbytes = 0; elog(INFO,"line[%04d] nitems=%d, dim=%d",__LINE__,nitems,*dim); for (i = 0; i < nitems; i++) { if(nulls[i]) hasnull = true; else { /* let's just make sure data is not toasted */ if (typlen == -1) values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i])); totbytes = att_addlength_datum(totbytes, typlen, values[i]); totbytes = att_align_nominal(totbytes, typalign); elog(INFO,"line[%04d] total bytes: %d",__LINE__,totbytes); /* check for overflow of total request */ if (!AllocSizeIsValid(totbytes)) ereturn(escontext, false, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array size exceed the maximum allowed (%d)", (int) MaxAllocSize))); } } *hasnulls = hasnull; *nbytes = totbytes; elog(INFO,"LINE[%04d] total bytes: %d, hasnulls:%d",__LINE__, totbytes, *hasnulls); return true; } /* * Copy datum to *dest and return total space used (including align padding) * * Caller must have handled case of NULL element */ //imitate array_in. Datum set_in_out_test(PG_FUNCTION_ARGS) { char *string = PG_GETARG_CSTRING(0); /* external form */ Oid element_type = PG_GETARG_OID(1); /* type of an array * element */ int32 typmod = PG_GETARG_INT32(2); /* typmod for array elements */ Node *escontext = fcinfo->context; int typlen; bool typbyval; char typalign; char typdelim = ','; Oid typioparam; char *string_save, *p; int nitems; Datum *dataPtr; bool *nullsPtr; bool hasnulls; int32 nbytes; int32 dataoffset; SetType *retval; int dim; int ndim; SetMetaState *my_extra; int capacity =64; bits8 *nullmap; /* * We arrange to look up info about element type, including its input * conversion proc, only once per series of calls, assuming the element * type doesn't change underneath us. */ my_extra = (SetMetaState *) fcinfo->flinfo->fn_extra; if (my_extra == NULL) { elog(INFO,"line[%04d] my_extra == NULL looped",__LINE__); fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt ,sizeof(SetMetaState)); my_extra = (SetMetaState *) fcinfo->flinfo->fn_extra; my_extra->element_type = ~element_type; } if (my_extra->element_type != element_type) { elog(INFO,"line[%04d] my_extra->element_type != element_type looped",__LINE__); /* * Get info about element type, including its input conversion proc */ get_type_io_data(element_type,IOFunc_input, &my_extra->typlen,&my_extra->typbyval, &my_extra->typalign, &my_extra->typdelim, &my_extra->typioparam,&my_extra->typiofunc); fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc, fcinfo->flinfo->fn_mcxt); my_extra->element_type = element_type; } typlen = my_extra->typlen; typbyval = my_extra->typbyval; typalign = my_extra->typalign; typdelim = my_extra->typdelim; typioparam = my_extra->typioparam; elog(INFO,"line[%04d]: typlen: %d, typbyval:%d typalign %c typdelim %c typioparam %d" ,__LINE__,typlen,typbyval, typalign, typdelim, typioparam); ndim = SetCount(string,&dim,typdelim, escontext); nitems = dim; elog(INFO,"line[%04d]: ndim:%d dim:%d nitems:%d",__LINE__,ndim,dim, nitems); /* Empty set? */ if (nitems == 0) { // PG_RETURN_SETTYPE_P(construct_empty_set(element_type)); elog(INFO,"line %04d empty set. now leaving",__LINE__); PG_RETURN_BOOL(true); } dataPtr = (Datum *) palloc(nitems * sizeof(Datum)); nullsPtr = (bool *) palloc(nitems * sizeof(bool)); /* Make a modifiable copy of the input */ string_save = pstrdup(string); /* * If the input string starts with dimension info, read and use that. * Otherwise, we require the input to be in curly-brace style, and we * prescan the input to determine dimensions. * * Dimension info takes the form of one or more [n] or [m:n] items. The * outer loop iterates once per dimension item. */ p = string_save; elog(INFO,"line %d, function oid : %d",__LINE__,*(&my_extra->proc.fn_oid)); Assert(nitems == dim); if (!ReadSetStr(p, string, nitems, ndim, &dim, &my_extra->proc, typioparam, typmod, typdelim, typlen, typbyval, typalign, dataPtr, nullsPtr, &hasnulls, &nbytes, escontext)) { elog(INFO,"LINE[%04d] ReadSetStr FALSE, nbytes %d, hasnulls %d dim:%d",__LINE__,nbytes,hasnulls,dim); PG_RETURN_BOOL(false); } elog(INFO,"LINE[%04d] ReadSetStr OK, nbytes %d, hasnulls %d dim:%d",__LINE__,nbytes,hasnulls,dim); Assert(ndim == 1); // or deal with ndim = 0; // compute init size needed initial. if (hasnulls) { dataoffset = SET_OVERHEAD_WITHNULLS(capacity, nitems); if (capacity < nitems) capacity = nitems + 7 - (nitems + 7) % 8; nbytes = capacity * typlen; nbytes += dataoffset; elog(INFO,"LINE[%04d] capacity:%d nitems:%d allocated bytes: %d SET_OVERHEAD_WITHNULLS :%ld" ,__LINE__,capacity, nitems,nbytes,SET_OVERHEAD_WITHNULLS(capacity, nitems)); } else { //init size no nulls dataoffset = 0; if (capacity < nitems) capacity = nitems + 7 - (nitems + 7) % 8; nbytes = capacity * typlen; nbytes += SET_OVERHEAD_NONULLS(capacity); elog(INFO,"LINE[%04d] SET_OVERHEAD_NONULLS: %ld allocated bytes: %d" ,__LINE__,SET_OVERHEAD_NONULLS(capacity), nbytes); } //allocate bytes and set some SetType property. retval = (SetType *) palloc0(nbytes); SET_VARSIZE(retval, nbytes); retval->capacity = capacity; retval->dataoffset = dataoffset; retval->nelements = 0; retval->elemtype = element_type; elog(INFO,"LINE[%04d] SetType meta info: retval->nelements:%d, retval->capacity:%d,retval->elemtype:%d,retval->dataoffset:%d" ,__LINE__,retval->nelements, retval->capacity,retval->elemtype,retval->dataoffset); //copy nullsPtr value to retval. CopySetNulls(retval,dataPtr,nullsPtr,nitems,typlen,typbyval,typalign,true); switch(element_type) { case 23: // retval = set_add_int32(retval, dataPtr,&nitems); for(int i = 0; i < nitems; i++) { set_add_int32(retval, dataPtr[i]); } break; case 20: for(int i = 0; i < nitems; i++) { set_add_int64(retval, dataPtr[i]); } break; case 1114: for(int i = 0; i < nitems; i++) { set_add_timestamp(retval, dataPtr[i]); } break; case 1186: for(int i = 0; i < nitems; i++) { set_add_interval(retval, dataPtr[i]); } elog(INFO,"now retval should have %d interval element oid:%d", retval->nelements,retval->elemtype); break; case 2950: for(int i = 0; i < nitems; i++) { set_add_uuid(retval, dataPtr[i]); } elog(INFO,"now retval should have %d interval element oid:%d", retval->nelements,retval->elemtype); break; default: elog(INFO,"line[%04d] now only support x type",__LINE__); } elog(INFO,"LINE[%04d] SetType meta info: retval->nelements:%d, retval->capacity:%d,retval->elemtype:%d,retval->dataoffset:%d" ,__LINE__,retval->nelements, retval->capacity,retval->elemtype,retval->dataoffset); //set out test. char *generic_setout; generic_setout = SetOut(retval); elog(INFO,"line[%04d]: set output:%s",__LINE__,generic_setout); pfree(dataPtr); pfree(nullsPtr); // PG_RETURN_ARRAYTYPE_P(retval); PG_RETURN_BOOL(true); } /* imitate CopyArrayEls. only copy nulls * Copy null into an array object from a temporary array of Datums. * * array: set object (with header fields already filled in) * nulls: array of is-null flags (can be NULL if no nulls) * nitems: number of Datums to be copied * typbyval, typlen, typalign: info about element datatype * freedata: if true and element type is pass-by-ref, pfree data values * referenced by Datums after copying them. * * If the input data is of varlena type, the caller must have ensured that * the values are not toasted. (Doing it here doesn't work since the * caller has already allocated space for the array...) */ void CopySetNulls(SetType *set, Datum *values, bool *nulls, int nitems, int typlen, bool typbyval, char typalign, bool freedata) { char *p = SET_DATA_PTR(set); bits8 *bitmap = SET_NULLBITMAP(set); int bitval = 0; int bitmask = 1; int i; if (typbyval) freedata = false; for (i = 0; i < nitems; i++) { if (nulls && nulls[i]) { if (!bitmap) /* shouldn't happen */ elog(ERROR, "null array element where not supported"); /* bitmap bit stays 0 */ } else { bitval |= bitmask; } if (bitmap) { bitmask <<= 1; if (bitmask == 0x100) { *bitmap++ = bitval; bitval = 0; bitmask = 1; } } } if (bitmap && bitmask != 1) *bitmap = bitval; } SetType * set_add_int32(SetType *set, Datum dvalue) { int byte; int bit; uint32 hash; uint32 position; char *bitmap; int32 *values; // set actual data begins. bitmap = SET_BITMAP(set); values = (int32 *) SET_DATA_PTR(set); // cannot cat to Datum (that's 8 byte). not sure this part. int32 value = DatumGetInt32(dvalue); hash = hash_bytes_uint32((uint32) value); position = hash % set->capacity; while (true) { byte = (position / 8); bit = (position % 8); /* The item is already used - maybe it's the same value? */ if (bitmap[byte] & (0x01 << bit)) { /* Same value, we're done */ if (values[position] == value) break; position = (position + HASHSET_STEP) % set->capacity; continue; } /* Found an empty spot, before hitting the value first */ bitmap[byte] |= (0x01 << bit); values[position] = value; set->nelements++; break; } return set; } SetType * set_add_int64(SetType *set, Datum dvalue) { int byte; int bit; uint32 hash; uint32 position; char *bitmap; Datum *datums; // set actual data begins. Datum tempd; hash = DatumGetUInt32(DirectFunctionCall1(hashint8,dvalue)); position = hash % set->capacity; bitmap = SET_BITMAP(set); datums = (Datum *)SET_DATA_PTR(set); while (true) { byte = (position / 8); bit = (position % 8); /* The item is already used - maybe it's the same value? */ if (bitmap[byte] & (0x01 << bit)) { /* Same value, we're done */ if (DatumGetBool(DirectFunctionCall2(int8eq,datums[position],dvalue))) break; position = (position + HASHSET_STEP) % set->capacity; continue; } /* Found an empty spot, before hitting the value first */ bitmap[byte] |= (0x01 << bit); datums[position] = dvalue; set->nelements++; break; } return set; } SetType * set_add_timestamp(SetType *set, Datum dvalue) { int byte; int bit; uint32 hash; uint32 position; char *bitmap; Datum *datums; // set actual data begins. Datum tempd; tempd = DirectFunctionCall1(timestamp_hash,dvalue); hash = DatumGetUInt32(tempd); position = hash % set->capacity; bitmap = SET_BITMAP(set); datums = (Datum *)SET_DATA_PTR(set); while (true) { byte = (position / 8); bit = (position % 8); /* The item is already used - maybe it's the same value? */ if (bitmap[byte] & (0x01 << bit)) { /* Same value, we're done */ if (DatumGetBool( DirectFunctionCall2(timestamp_eq,datums[position],dvalue))) break; position = (position + HASHSET_STEP) % set->capacity; continue; } /* Found an empty spot, before hitting the value first */ bitmap[byte] |= (0x01 << bit); datums[position] = dvalue; set->nelements++; break; } return set; } SetType *set_add_uuid(SetType *set, Datum dvalue) { int byte; int bit; uint32 hash; uint32 position; char *bitmap; Datum *datums; // set actual data begins. Datum tempd; tempd = DirectFunctionCall1(uuid_hash,dvalue); hash = DatumGetUInt32(tempd); position = hash % set->capacity; bitmap = SET_BITMAP(set); datums = (Datum *)SET_DATA_PTR(set); while (true) { byte = (position / 8); bit = (position % 8); /* The item is already used - maybe it's the same value? */ if (bitmap[byte] & (0x01 << bit)) { /* Same value, we're done */ if (DatumGetBool(DirectFunctionCall2(uuid_eq,datums[position],dvalue))) break; position = (position + HASHSET_STEP) % set->capacity; continue; } /* Found an empty spot, before hitting the value first */ bitmap[byte] |= (0x01 << bit); datums[position] = dvalue; set->nelements++; break; } return set; } SetType * set_add_interval(SetType *set, Datum dvalue) { int byte; int bit; uint32 hash; uint32 position; char *bitmap; Datum *datums; // set actual data begins. Datum tempd; tempd = DirectFunctionCall1(interval_hash,dvalue); hash = DatumGetUInt32(tempd); position = hash % set->capacity; bitmap = SET_BITMAP(set); datums = (Datum *)SET_DATA_PTR(set); while (true) { byte = (position / 8); bit = (position % 8); /* The item is already used - maybe it's the same value? */ if (bitmap[byte] & (0x01 << bit)) { /* Same value, we're done */ if (DatumGetBool(DirectFunctionCall2(interval_eq,datums[position],dvalue))) break; position = (position + HASHSET_STEP) % set->capacity; continue; } /* Found an empty spot, before hitting the value first */ bitmap[byte] |= (0x01 << bit); datums[position] = dvalue; set->nelements++; break; } return set; } /* * construct_empty_set. --- make a zero-dimensional set of given type */ SetType * construct_empty_set(Oid elmtype) { SetType *result; result = (SetType *) palloc0(sizeof(SetType)); SET_VARSIZE(result, sizeof(SetType)); result->capacity = 0; result->dataoffset = 0; result->elemtype = elmtype; return result; } char * SetOut(SetType *retval) { StringInfoData str; /* Initialize the StringInfo buffer */ initStringInfo(&str); /* Append the opening brace for the output hashset string */ appendStringInfoChar(&str, '{'); char *bitmap = SET_BITMAP(retval); //int out. if ( retval->elemtype == 23) { int32 *output = (int32 *) SET_DATA_PTR(retval); /* Loop through the elements and append them to the string */ for(int i = 0; i < retval->capacity; i++) { int byte = i / 8; int bit = i % 8; /* Check if the bit in the bitmap is set */ if (bitmap[byte] & (0x01 << bit)) { /* Append the value */ if (str.len > 1) appendStringInfoChar(&str, ','); appendStringInfo(&str, "%d", output[i]); } } } //int64 out if (retval->elemtype == 20) { Datum *datums = (Datum *) SET_DATA_PTR(retval); /* Loop through the elements and append them to the string */ for(int i = 0; i < retval->capacity; i++) { int byte = i / 8; int bit = i % 8; /* Check if the bit in the bitmap is set */ if (bitmap[byte] & (0x01 << bit)) { /* Append the value */ if (str.len > 1) appendStringInfoChar(&str, ','); appendStringInfo(&str, "%ld", DatumGetInt64(datums[i])); } } } //timestamp out if (retval->elemtype == 1114) { Datum *datums = (Datum *) SET_DATA_PTR(retval); for (int i = 0; i < retval->capacity; i++) { int byte = i / 8; int bit = i % 8; /* Check if the bit in the bitmap is set */ if (bitmap[byte] & (0x01 << bit)) { /* Append the value */ if (str.len > 1) appendStringInfoChar(&str, ','); appendStringInfo(&str, "%s",DatumGetCString(DirectFunctionCall1(timestamp_out,datums[i]))); } } } // interval out if ( retval->elemtype == 1186) { Datum *datums = (Datum *) SET_DATA_PTR(retval); for (int i = 0; i < retval->capacity; i++) { int byte = i / 8; int bit = i % 8; /* Check if the bit in the bitmap is set */ if (bitmap[byte] & (0x01 << bit)) { /* Append the value */ if (str.len > 1) appendStringInfoChar(&str, ','); appendStringInfo(&str, "%s",DatumGetCString(DirectFunctionCall1(interval_out,datums[i]))); } } } //uuid out if ( retval->elemtype == 2950) { Datum *datums = (Datum *) SET_DATA_PTR(retval); for (int i = 0; i < retval->capacity; i++) { int byte = i / 8; int bit = i % 8; /* Check if the bit in the bitmap is set */ if (bitmap[byte] & (0x01 << bit)) { /* Append the value */ if (str.len > 1) appendStringInfoChar(&str, ','); appendStringInfo(&str, "%s",DatumGetCString(DirectFunctionCall1(uuid_out,datums[i]))); } } } /* Append the closing brace for the output hashset string */ appendStringInfoChar(&str, '}'); return str.data; }