> Or maybe I just don't understand the proposal. Perhaps it'd be best if
> jian wrote a patch illustrating the idea, and showing how it performs
> compared to the current approach.

currently joel's idea is a int4hashset. based on the code first tomas wrote.
it looks like a non-nested an collection of unique int4. external text
format looks like {int4, int4,int4}
structure looks like (header +  capacity slots * int4).
Within the capacity slots, some slots are empty, some have unique values.

The textual int4hashset looks like a one dimensional array.
so I copied/imitated src/backend/utils/adt/arrayfuncs.c code, rewrote a
slight generic hashset input and output function.

see the attached c file.
It works fine for non-null input output for {int4hashset, int8hashset,
timestamphashset,intervalhashset,uuidhashset).
/*
gcc -I/home/jian/postgres/2023_05_25_beta5421/include/server -fPIC -c /home/jian/Desktop/regress_pgsql/set.c
gcc -shared  -o /home/jian/Desktop/regress_pgsql/set.so /home/jian/Desktop/regress_pgsql/set.o

CREATE OR REPLACE FUNCTION set_in_out_test(cstring,int, int) RETURNS BOOL SET search_path from current
        AS '/home/jian/Desktop/regress_pgsql/set', 'set_in_out_test'
        LANGUAGE C IMMUTABLE;

select  set_in_out_test('{111,-0,+0,-2147483648,2147483647}',23,-1);
select  set_in_out_test('{}',23,-1);
select  set_in_out_test('{-1111111,-9223372036854775808,9223372036854775807,-0,+0}',20,-1);
select  set_in_out_test('{2022-01-01T11:21:21.741077 +05:30, 2022-01-01T11:21:21.741076 +05:30,-infinity,+infinity}',1114,-1);
select  set_in_out_test('{"1hour", " 0:00","-0:00","2000:10:01.23456789"}',1186,-1); 
select  set_in_out_test('{03aadb61-3e30-4112-a46a-8cd72f29876b,2c1e4b2c-b8f4-470a-843f-dd094e4743a6
			,ef1a3202-a84f-4321-ab3c-45e04bf4c42d,2c1e4b2c-b8f4-470a-843f-dd094e4743a6}',2950,-1);

-----------parse fail cases.
select  set_in_out_test('{{-1111111,-9223372036854775808,9223372036854775807,-0,+0}}',20,-1);
select  set_in_out_test('{-1111111,-9223372036854775808,9223372036854775807,-0,+0.1}',20,-1);
select  set_in_out_test('{-1111111,-9223372036854775808,9223372036854775808,-0,+0.1}',20,-1);
select  set_in_out_test('{2022-01-01T11:21:21.7410as +05:30}',1114,-1);
select  set_in_out_test('{2022-01-01T11:21:21.7410 +05:3a}',1114,-1);
select  set_in_out_test('{2022-01-01T11:21:21.7410 +0x:31}',1114,-1);
select  set_in_out_test('{{2022-01-01T11:21:21.7410 +01:31}}',1114,-1);

select  set_in_out_test('{NULL,1,2,NULL,NULL}',23,-1);

*/
#include "postgres.h"
#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "utils/builtins.h"
// #include "utils/array.h"
#include "utils/numeric.h"
#include "utils/timestamp.h"
#include "funcapi.h"
#include "utils/lsyscache.h"
#include "utils/fmgrprotos.h"
#include "common/hashfn.h"
#include "utils/uuid.h"
PG_MODULE_MAGIC;

PG_FUNCTION_INFO_V1(set_in_out_test);
#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
#define HASHSET_STEP 13

#define PG_RETURN_SETTYPE_P(x)	  PG_RETURN_POINTER(x)

/*
 * Arrays are varlena objects, so must meet the varlena convention that
 * the first int32 of the object contains the total object size in bytes.
 * Be sure to use VARSIZE() and SET_VARSIZE() to access it, though!
 *
 * CAUTION: if you change the header for ordinary arrays you will also
 * need to change the headers for oidvector and int2vector!
 */
typedef struct SetType
{
	int32		vl_len_;		/* varlena header (do not touch directly!) */
	int32       capacity;		/* # of capacity */
	int32		dataoffset;		/* offset to data, or 0 if no bitmap */
	int32		nelements;		/* number of items added to the hashset */
	Oid			elemtype;		/* element type OID */
} SetType;

#define SET_SIZE(a)				VARSIZE(a)
#define SET_ITEM(a)				((a)->nelements)
#define SET_CAPACITY(a)         ((a)->capacity)
#define SET_HASNULL(a)			((a)->dataoffset != 0)
#define SET_ELEMTYPE(a)			((a)->elemtype)

#define SET_OVERHEAD_NONULLS(capacity) \
		MAXALIGN(sizeof(SetType) + CEIL_DIV(capacity, 8))
#define SET_OVERHEAD_WITHNULLS(capacity,nelements) \
		MAXALIGN(sizeof(SetType) + CEIL_DIV(capacity, 8) + \
				 ((nelements) + 7) / 8)
#define SET_BITMAP(a) \
        (((char *) (a)) + sizeof(SetType))
#define SET_DATA_OFFSET(a) \
		(SET_HASNULL(a) ? (a)->dataoffset : SET_OVERHEAD_NONULLS(SET_CAPACITY(a)))
#define SET_NULLBITMAP(a) \
		(SET_HASNULL(a) ? \
		 (bits8 *) (((char *) (a)) + sizeof(SetType) + \
					((a->nelements) + 7) / 8) \
		 : (bits8 *) NULL)

/*
 * Returns a pointer to the actual array data.
 */
#define SET_DATA_PTR(a) \
		(((char *) (a)) + SET_DATA_OFFSET(a))

typedef struct SetMetaState
{
	Oid			element_type;
	int16		typlen;
	bool		typbyval;
	char		typalign;
	char		typdelim;
	Oid			typioparam;
	Oid			typiofunc;
	FmgrInfo	proc;
} SetMetaState;


static int  SetCount(const char *str, int *dim, char typdelim, Node *escontext); 
static bool set_get_isnull(const bits8 *nullbitmap, int offset);
SetType *set_add_int32(SetType *set, Datum dvalue);
SetType *set_add_int64(SetType *set, Datum dvalue);
SetType *set_add_interval(SetType *set, Datum dvalue);
SetType *set_add_uuid(SetType *set, Datum dvalue);
SetType *set_add_timestamp(SetType *set, Datum dvalue);
SetType *construct_empty_set(Oid elmtype);
SetType *set_init(int capacity,Oid elemtype, bool hasnull);
char *	SetOut(SetType *retval);

void
CopySetNulls(SetType *set,
			 Datum *values,
			 bool *nulls,
			 int nitems,
			 int typlen,
			 bool typbyval,
			 char typalign,
			 bool freedata);

/*
 * Check whether a specific array element is NULL
 *
 * nullbitmap: pointer to array's null bitmap (NULL if none)
 * offset: 0-based linear element number of array element
 */
static bool
set_get_isnull(const bits8 *nullbitmap, int offset)
{
	if (nullbitmap == NULL)
		return false;			/* assume not null */
	if (nullbitmap[offset / 8] & (1 << (offset % 8)))
		return false;			/* not null */
	return true;
}

/*
 * array_isspace() --- a non-locale-dependent isspace()
 *
 * We used to use isspace() for parsing array values, but that has
 * undesirable results: an array value might be silently interpreted
 * differently depending on the locale setting.  Now we just hard-wire
 * the traditional ASCII definition of isspace().
 */
static bool
set_isspace(char ch)
{
	if (ch == ' ' ||
		ch == '\t' ||
		ch == '\n' ||
		ch == '\r' ||
		ch == '\v' ||
		ch == '\f')
		return true;
	return false;
}


/*
 * ReadSetStr :
 *	 parses the array string pointed to by "arrayStr" and converts the values
 *	 to internal format.  Unspecified elements are initialized to nulls.
 *	 The array dimensions must already have been determined.
 *
 * Inputs:
 *	arrayStr: the string to parse.
 *			  CAUTION: the contents of "arrayStr" will be modified!
 *	origStr: the unmodified input string, used only in error messages.
 *	nitems: total number of array elements, as already determined.
 *	ndim: number of array dimensions
 *	dim[]: array axis lengths
 *	inputproc: type-specific input procedure for element datatype.
 *	typioparam, typmod: auxiliary values to pass to inputproc.
 *	typdelim: the value delimiter (type-specific).
 *	typlen, typbyval, typalign: storage parameters of element datatype.
 *
 * Outputs:
 *	values[]: filled with converted data values.
 *	nulls[]: filled with is-null markers.
 *	*hasnulls: set true iff there are any null elements.
 *	*nbytes: set to total size of data area needed (including alignment
 *		padding but not including array header overhead).
 *	*escontext: if this points to an ErrorSaveContext, details of
 *		any error are reported there.
 *
 * Result:
 *	true for success, false for failure (if escontext is provided).
 *
 * Note that values[] and nulls[] are allocated by the caller, and must have
 * nitems elements.
 */

typedef enum
{
	SET_NO_LEVEL,
	SET_LEVEL_STARTED,
	SET_ELEM_STARTED,
	SET_ELEM_COMPLETED,
	SET_QUOTED_ELEM_STARTED,
	SET_QUOTED_ELEM_COMPLETED,
	SET_ELEM_DELIMITED,
	SET_LEVEL_COMPLETED,
	SET_LEVEL_DELIMITED
} SetParseState;

/*
 * SetCount
 *	 Determines the dimensions for an array string.
 *
 * Returns number of dimensions as function result.  The axis lengths are
 * returned in dim[], which must be of size MAXDIM.
 *
 * If we detect an error, fill *escontext with error details and return -1
 * (unless escontext isn't provided, in which case errors will be thrown).
 */
#undef  MAXDIM
#define MAXDIM  1

static	int
SetCount(const char *str, int *dim, char typdelim, Node *escontext)
{
	int			nest_level = 0,
				i;
	int			ndim = 1,
				temp[MAXDIM],
				nelems[MAXDIM],
				nelems_last[MAXDIM];
	bool		in_quotes = false;
	bool		eoArray = false;
	bool		empty_array = true;
	const char *ptr;
	SetParseState parse_state = SET_NO_LEVEL;

	for (i = 0; i < MAXDIM; ++i)
	{
		temp[i] = dim[i] = nelems_last[i] = 0;
		nelems[i] = 1;
	}

	ptr = str;
	while (!eoArray)
	{
		bool		itemdone = false;

		while (!itemdone)
		{
			if (parse_state == SET_ELEM_STARTED ||
				parse_state == SET_QUOTED_ELEM_STARTED)
				empty_array = false;

			switch (*ptr)
			{
				case '\0':
					/* Signal a premature end of the string */
					ereturn(escontext, -1,
							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
							 errmsg("malformed array literal: \"%s\"", str),
							 errdetail("Unexpected end of input.")));
				case '\\':

					/*
					 * An escape must be after a level start, after an element
					 * start, or after an element delimiter. In any case we
					 * now must be past an element start.
					 */
					if (parse_state != SET_LEVEL_STARTED &&
						parse_state != SET_ELEM_STARTED &&
						parse_state != SET_QUOTED_ELEM_STARTED &&
						parse_state != SET_ELEM_DELIMITED)
						ereturn(escontext, -1,
								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
								 errmsg("malformed array literal: \"%s\"", str),
								 errdetail("Unexpected \"%c\" character.",
										   '\\')));
					if (parse_state != SET_QUOTED_ELEM_STARTED)
						parse_state = SET_ELEM_STARTED;
					/* skip the escaped character */
					if (*(ptr + 1))
						ptr++;
					else
						ereturn(escontext, -1,
								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
								 errmsg("malformed array literal: \"%s\"", str),
								 errdetail("Unexpected end of input.")));
					break;
				case '"':

					/*
					 * A quote must be after a level start, after a quoted
					 * element start, or after an element delimiter. In any
					 * case we now must be past an element start.
					 */
					if (parse_state != SET_LEVEL_STARTED &&
						parse_state != SET_QUOTED_ELEM_STARTED &&
						parse_state != SET_ELEM_DELIMITED)
						ereturn(escontext, -1,
								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
								 errmsg("malformed array literal: \"%s\"", str),
								 errdetail("Unexpected array element.")));
					in_quotes = !in_quotes;
					if (in_quotes)
						parse_state = SET_QUOTED_ELEM_STARTED;
					else
						parse_state = SET_QUOTED_ELEM_COMPLETED;
					break;
				case '{':
					if (!in_quotes)
					{
						/*
						 * A left brace can occur if no nesting has occurred
						 * yet, after a level start, or after a level
						 * delimiter.
						 */
						if (parse_state != SET_NO_LEVEL &&
							parse_state != SET_LEVEL_STARTED &&
							parse_state != SET_LEVEL_DELIMITED)
							ereturn(escontext, -1,
									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
									 errmsg("malformed array literal: \"%s\"", str),
									 errdetail("Unexpected \"%c\" character.",
											   '{')));
						parse_state = SET_LEVEL_STARTED;
						if (nest_level >= MAXDIM)
							ereturn(escontext, -1,
									(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
									 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
											nest_level + 1, MAXDIM)));
						temp[nest_level] = 0;
						nest_level++;
						if (ndim < nest_level)
							ndim = nest_level;
					}
					break;
				case '}':
					if (!in_quotes)
					{
						/*
						 * A right brace can occur after an element start, an
						 * element completion, a quoted element completion, or
						 * a level completion.
						 */
						if (parse_state != SET_ELEM_STARTED &&
							parse_state != SET_ELEM_COMPLETED &&
							parse_state != SET_QUOTED_ELEM_COMPLETED &&
							parse_state != SET_LEVEL_COMPLETED &&
							!(nest_level == 1 && parse_state == SET_LEVEL_STARTED))
							ereturn(escontext, -1,
									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
									 errmsg("malformed array literal: \"%s\"", str),
									 errdetail("Unexpected \"%c\" character.",
											   '}')));
						parse_state = SET_LEVEL_COMPLETED;
						if (nest_level == 0)
							ereturn(escontext, -1,
									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
									 errmsg("malformed array literal: \"%s\"", str),
									 errdetail("Unmatched \"%c\" character.", '}')));
						nest_level--;

						if (nelems_last[nest_level] != 0 &&
							nelems[nest_level] != nelems_last[nest_level])
							ereturn(escontext, -1,
									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
									 errmsg("malformed array literal: \"%s\"", str),
									 errdetail("Multidimensional arrays must have "
											   "sub-arrays with matching "
											   "dimensions.")));
						nelems_last[nest_level] = nelems[nest_level];
						nelems[nest_level] = 1;
						if (nest_level == 0)
							eoArray = itemdone = true;
						else
						{
							/*
							 * We don't set itemdone here; see comments in
							 * ReadSetStr
							 */
							temp[nest_level - 1]++;
						}
					}
					break;
				default:
					if (!in_quotes)
					{
						if (*ptr == typdelim)
						{
							/*
							 * Delimiters can occur after an element start, an
							 * element completion, a quoted element
							 * completion, or a level completion.
							 */
							if (parse_state != SET_ELEM_STARTED &&
								parse_state != SET_ELEM_COMPLETED &&
								parse_state != SET_QUOTED_ELEM_COMPLETED &&
								parse_state != SET_LEVEL_COMPLETED)
								ereturn(escontext, -1,
										(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
										 errmsg("malformed array literal: \"%s\"", str),
										 errdetail("Unexpected \"%c\" character.",
												   typdelim)));
							if (parse_state == SET_LEVEL_COMPLETED)
								parse_state = SET_LEVEL_DELIMITED;
							else
								parse_state = SET_ELEM_DELIMITED;
							itemdone = true;
							nelems[nest_level - 1]++;
						}
						else if (!set_isspace(*ptr))
						{
							/*
							 * Other non-space characters must be after a
							 * level start, after an element start, or after
							 * an element delimiter. In any case we now must
							 * be past an element start.
							 */
							if (parse_state != SET_LEVEL_STARTED &&
								parse_state != SET_ELEM_STARTED &&
								parse_state != SET_ELEM_DELIMITED)
								ereturn(escontext, -1,
										(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
										 errmsg("malformed array literal: \"%s\"", str),
										 errdetail("Unexpected array element.")));
							parse_state = SET_ELEM_STARTED;
						}
					}
					break;
			}
			if (!itemdone)
				ptr++;
		}
		temp[ndim - 1]++;
		ptr++;
	}

	/* only whitespace is allowed after the closing brace */
	while (*ptr)
	{
		if (!set_isspace(*ptr++))
			ereturn(escontext, -1,
					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
					 errmsg("malformed array literal: \"%s\"", str),
					 errdetail("Junk after closing right brace.")));
	}

	/* special case for an empty array */
	if (empty_array)
		return 0;

	for (i = 0; i < ndim; ++i)
		dim[i] = temp[i];

	return ndim;
}

bool    Array_nulls = true;
static bool
ReadSetStr(char   *arrayStr,
            const char *origStr,
            int     nitems,
            int     ndim,
            int     *dim,
            FmgrInfo *inputproc,
            Oid     typioparam,
            int32   typmod,
            char    typdelim,
            int     typlen,
            bool    typbyval,
            char    typalign,
            Datum   *values,
            bool    *nulls,
            bool    *hasnulls,
            int32   *nbytes,
            Node    *escontext)
{
	int		i;
    char    *srcptr;
    bool    in_quotes   = false;
    bool    eoArray     = false;
    bool    hasnull;
    int32   totbytes;
    int     indx	= 0;

	/* Initialize is-null markers to true */
    memset(nulls, true, nitems * sizeof(bool));        

	/*
	 * We have to remove " and \ characters to create a clean item value to
	 * pass to the datatype input routine.  We overwrite each item value
	 * in-place within arrayStr to do this.  srcptr is the current scan point,
	 * and dstptr is where we are copying to.
	 *
	 * We also want to suppress leading and trailing unquoted whitespace. We
	 * use the leadingspace flag to suppress leading space.  Trailing space is
	 * tracked by using dstendptr to point to the last significant output
	 * character.
	 *
	 * The error checking in this routine is mostly pro-forma, since we expect
	 * that SetCount() already validated the string.  So we don't bother
	 * with errdetail messages.
	 */
    srcptr  = arrayStr;
    while (!eoArray)
    {
        bool    itemdone = false;
        bool    leadingspace = true;
        bool    hasquoting  = false;
        char    *itemstart;
        char    *dstptr;
        char    *dstendptr;

        itemstart = dstptr = dstendptr = srcptr;

        while (!itemdone)
        {
            switch(*srcptr)
            {
                case '\0':
					/* Signal a premature end of the string */
					/* Signal a premature end of the string */
					ereturn(escontext, false,
							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
							 errmsg("malformed array literal: \"%s\"",
									origStr)));
                    break;
                case '\\':
                    /* Skip backslash, copy next character as-is. */
                    srcptr++;
                    if (*srcptr == '\0')
                        ereturn(escontext,false,
                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                     errmsg("malformed array literal: \"%s\"",
                                            origStr)));
                    /* Treat the escaped character as non-whitespace*/                                    
                    leadingspace = false;
                    dstendptr   = dstptr;
                    hasquoting  = false;    /* can't be a NULL marker */
                    break;

                case '"': 
                    in_quotes   = !in_quotes;
                    if (in_quotes)
                        leadingspace = false;
                    else
                    {
						/*
						 * Advance dstendptr when we exit in_quotes; this
						 * saves having to do it in all the other in_quotes
						 * cases.
                         */                        
                        dstendptr   = dstptr;
                    }
                    hasquoting  = true;     /* can't be a NULL marker */
                    srcptr++;
                    break;
                case '{':
                    if (!in_quotes)
                    {
                        srcptr++;
                    }
                    else
                        *dstptr++   = *srcptr++;
                    break;
                case '}': 
                    if (!in_quotes)
                    {
						eoArray	= itemdone = true;
						srcptr++;
                    }
                    else
                        *dstptr++   = *srcptr++;
                    break;
                default :
                    if(in_quotes)
                        *dstptr++ = *srcptr++;
                    else if (*srcptr == typdelim)
                    {
                        itemdone = true;                        
                        srcptr ++;
                    }
                    else if (set_isspace(*srcptr))
                    {
                        /*
                        * If leading space, drop it immediately.  Else, copy
                        * but don't advance dstendptr.
                        */
                        if(leadingspace)
                            srcptr++;
                        else
                            *dstptr++ = *srcptr++;            
                    }
                    else
                    {
                        *dstptr++   = *srcptr++;
                        leadingspace    = false;
                        dstendptr       = dstptr;
                    }             
                    break;
            }
        }
        
        Assert(dstptr   < srcptr);
        *dstendptr  = '\0';

        if (Array_nulls && !hasquoting &&
			pg_strcasecmp(itemstart, "NULL") == 0)
		{
			/* it's a NULL item */
			if (!InputFunctionCallSafe(inputproc, NULL,
									   typioparam, typmod,
									   escontext,
									   &values[indx]))
				return false;
			nulls[indx]	= true;
			indx++;
		}
		else
		{
			elog(INFO,"line[%04d] indx:%d itemstart:%s, typioparam:%d,typmod:%d",__LINE__,indx,itemstart, typioparam,typmod);
			if (!InputFunctionCallSafe(inputproc, itemstart, 
										typioparam, typmod, 
										escontext,
										&values[indx]))
				return false;
			nulls[indx] = false;
			indx++;				
		}
    }

	/*
	 * Check for nulls, compute total data space needed
	 */
	hasnull	= false;
	totbytes	= 0;
	elog(INFO,"line[%04d] nitems=%d, dim=%d",__LINE__,nitems,*dim);
	for (i = 0; i < nitems; i++)
	{
		if(nulls[i])
			hasnull	= true;
		else
		{
			/* let's just make sure data is not toasted */
			if (typlen ==  -1)
				values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
			totbytes = att_addlength_datum(totbytes, typlen, values[i]);
			totbytes = att_align_nominal(totbytes, typalign);
			
			elog(INFO,"line[%04d] total bytes: %d",__LINE__,totbytes);
			/* check for overflow of total request */
			if (!AllocSizeIsValid(totbytes))
				ereturn(escontext, false,
						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
						 errmsg("array size exceed the maximum allowed (%d)",
						(int) MaxAllocSize)));			
		}
	}
	*hasnulls 	= hasnull;
	*nbytes		= totbytes;
	
	elog(INFO,"LINE[%04d] total bytes: %d, hasnulls:%d",__LINE__, totbytes, *hasnulls);
	return	 true;	
}            

/*
 * Copy datum to *dest and return total space used (including align padding)
 *
 * Caller must have handled case of NULL element
 */


//imitate array_in.
Datum
set_in_out_test(PG_FUNCTION_ARGS)
{
	char	   *string = PG_GETARG_CSTRING(0);		/* external form */
	Oid			element_type = PG_GETARG_OID(1);	/* type of an array * element */
	int32		typmod = PG_GETARG_INT32(2);	    /* typmod for array elements */

    Node	   *escontext = fcinfo->context;
    int         typlen; 
    bool        typbyval;
    char        typalign;
    char    	typdelim   = ','; 
    Oid         typioparam;
    char        *string_save,
                *p;                
    int     nitems;
	Datum	   *dataPtr;
	bool	   *nullsPtr;
    bool        hasnulls;
    int32       nbytes;
    int32       dataoffset;
    SetType   *retval;
    int     dim;
    int     ndim;
    SetMetaState *my_extra;
    int capacity =64;
	bits8	*nullmap;
	
	/*
	 * We arrange to look up info about element type, including its input
	 * conversion proc, only once per series of calls, assuming the element
	 * type doesn't change underneath us.
	 */	
	my_extra	= (SetMetaState *) fcinfo->flinfo->fn_extra;
	if (my_extra == NULL)
	{
        elog(INFO,"line[%04d] my_extra == NULL looped",__LINE__);

		fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt
													,sizeof(SetMetaState));
		my_extra	= (SetMetaState *) fcinfo->flinfo->fn_extra;
		my_extra->element_type	= ~element_type;
	}

	if (my_extra->element_type != element_type)
	{
        elog(INFO,"line[%04d] my_extra->element_type != element_type looped",__LINE__);
		/*
		 * Get info about element type, including its input conversion proc
		 */
		get_type_io_data(element_type,IOFunc_input,
						 &my_extra->typlen,&my_extra->typbyval,
						 &my_extra->typalign, &my_extra->typdelim,
						 &my_extra->typioparam,&my_extra->typiofunc);
		fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
					fcinfo->flinfo->fn_mcxt);
		my_extra->element_type = element_type; 
	}
	typlen		= my_extra->typlen;
	typbyval	= my_extra->typbyval;
	typalign	= my_extra->typalign; 
	typdelim	= my_extra->typdelim; 
	typioparam	= my_extra->typioparam;

	elog(INFO,"line[%04d]: typlen: %d, typbyval:%d typalign %c typdelim %c typioparam %d"
						,__LINE__,typlen,typbyval, typalign, typdelim, typioparam);

    ndim    = SetCount(string,&dim,typdelim, escontext);
	nitems  = dim;

	elog(INFO,"line[%04d]: ndim:%d	dim:%d	nitems:%d",__LINE__,ndim,dim, nitems);
	
	/* Empty set? */
	if (nitems == 0)
	{
		// PG_RETURN_SETTYPE_P(construct_empty_set(element_type));    
		elog(INFO,"line %04d empty set. now leaving",__LINE__);
		PG_RETURN_BOOL(true);
	}

    dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
	nullsPtr = (bool *) palloc(nitems * sizeof(bool));

	/* Make a modifiable copy of the input */
	string_save = pstrdup(string);

	/*
	 * If the input string starts with dimension info, read and use that.
	 * Otherwise, we require the input to be in curly-brace style, and we
	 * prescan the input to determine dimensions.
	 *
	 * Dimension info takes the form of one or more [n] or [m:n] items. The
	 * outer loop iterates once per dimension item.
	 */
	p = string_save;
	
	elog(INFO,"line %d, function oid : %d",__LINE__,*(&my_extra->proc.fn_oid));
	Assert(nitems == dim);
    if (!ReadSetStr(p, string,
					  nitems, ndim, &dim,
					  &my_extra->proc, typioparam, typmod,
					  typdelim,
					  typlen, typbyval, typalign,
					  dataPtr, nullsPtr,
					  &hasnulls, &nbytes, escontext))
	{
		elog(INFO,"LINE[%04d] ReadSetStr FALSE, nbytes %d, hasnulls %d dim:%d",__LINE__,nbytes,hasnulls,dim);
	    PG_RETURN_BOOL(false);
	}
	elog(INFO,"LINE[%04d] ReadSetStr OK, nbytes %d, hasnulls %d dim:%d",__LINE__,nbytes,hasnulls,dim);

	Assert(ndim == 1);          // or deal with ndim = 0;

	// compute init size needed initial.
	if (hasnulls)
	{
		dataoffset	= SET_OVERHEAD_WITHNULLS(capacity, nitems);
        
		if (capacity < nitems)
            capacity =  nitems + 7 - (nitems + 7) % 8;

        nbytes  = capacity * typlen;
		nbytes 		+=  dataoffset;

		elog(INFO,"LINE[%04d] capacity:%d nitems:%d allocated bytes: %d SET_OVERHEAD_WITHNULLS :%ld"
						,__LINE__,capacity, nitems,nbytes,SET_OVERHEAD_WITHNULLS(capacity, nitems));
	}
	else
	{
		//init size no nulls
		dataoffset	= 0;
        if (capacity < nitems)
            capacity =  nitems + 7 - (nitems + 7) % 8;

        nbytes  = capacity * typlen;
		nbytes		+= SET_OVERHEAD_NONULLS(capacity); 

        elog(INFO,"LINE[%04d] SET_OVERHEAD_NONULLS: %ld allocated bytes: %d"
							,__LINE__,SET_OVERHEAD_NONULLS(capacity), nbytes);
	}

	//allocate bytes and set some SetType property.
	retval = (SetType *) palloc0(nbytes);
	SET_VARSIZE(retval, nbytes);
	retval->capacity	= capacity;
	retval->dataoffset 	= dataoffset;
	retval->nelements	= 0;
	retval->elemtype	= element_type;
	
	elog(INFO,"LINE[%04d] SetType meta info: retval->nelements:%d, retval->capacity:%d,retval->elemtype:%d,retval->dataoffset:%d"
			,__LINE__,retval->nelements, retval->capacity,retval->elemtype,retval->dataoffset);
	
	//copy nullsPtr value to retval.
	CopySetNulls(retval,dataPtr,nullsPtr,nitems,typlen,typbyval,typalign,true);    

	switch(element_type) 
	{
		case 23:
			// retval	= set_add_int32(retval, dataPtr,&nitems);
			for(int i = 0; i < nitems; i++)
			{
				set_add_int32(retval, dataPtr[i]);
			}		
			break;
		case 20:
			for(int i = 0; i < nitems; i++)
			{
				set_add_int64(retval, dataPtr[i]);
			}		
			break;
		case 1114:
			for(int i = 0; i < nitems; i++)
			{
				set_add_timestamp(retval, dataPtr[i]);
			}
			break;	

		case 1186:
			for(int i = 0; i < nitems; i++)
			{
				set_add_interval(retval, dataPtr[i]);
			}
			elog(INFO,"now retval should have %d interval element oid:%d", retval->nelements,retval->elemtype);
			break;	
		case 2950:
			for(int i = 0; i < nitems; i++)
			{
				set_add_uuid(retval, dataPtr[i]);
			}
			elog(INFO,"now retval should have %d interval element oid:%d", retval->nelements,retval->elemtype);
			break;	

		default:
			elog(INFO,"line[%04d] now only support x type",__LINE__);
	}

    elog(INFO,"LINE[%04d] SetType meta info: retval->nelements:%d, retval->capacity:%d,retval->elemtype:%d,retval->dataoffset:%d"
			,__LINE__,retval->nelements, retval->capacity,retval->elemtype,retval->dataoffset);

	//set out test.
	char	*generic_setout;
	generic_setout	= SetOut(retval);
	elog(INFO,"line[%04d]: set output:%s",__LINE__,generic_setout);

	pfree(dataPtr);
	pfree(nullsPtr);
	// PG_RETURN_ARRAYTYPE_P(retval);
    PG_RETURN_BOOL(true);
}

/*
	imitate CopyArrayEls. only copy nulls
 * Copy null into an array object from a temporary array of Datums.
 *
 * array: set object (with header fields already filled in)
 * nulls: array of is-null flags (can be NULL if no nulls)
 * nitems: number of Datums to be copied
 * typbyval, typlen, typalign: info about element datatype
 * freedata: if true and element type is pass-by-ref, pfree data values
 * referenced by Datums after copying them.
 *
 * If the input data is of varlena type, the caller must have ensured that
 * the values are not toasted.  (Doing it here doesn't work since the
 * caller has already allocated space for the array...)
 */
void
CopySetNulls(SetType *set,
			 Datum *values,
			 bool *nulls,
			 int nitems,
			 int typlen,
			 bool typbyval,
			 char typalign,
			 bool freedata)
{
	char	   *p = SET_DATA_PTR(set);
	bits8	   *bitmap = SET_NULLBITMAP(set);
	int			bitval = 0;
	int			bitmask = 1;
	int			i;

	if (typbyval)
		freedata = false;

	for (i = 0; i < nitems; i++)
	{
		if (nulls && nulls[i])
		{
			if (!bitmap)		/* shouldn't happen */
				elog(ERROR, "null array element where not supported");
			/* bitmap bit stays 0 */
		}
		else
		{
			bitval |= bitmask;
		}
		if (bitmap)
		{
			bitmask <<= 1;
			if (bitmask == 0x100)
			{
				*bitmap++ = bitval;
				bitval = 0;
				bitmask = 1;
			}
		}
	}

	if (bitmap && bitmask != 1)
		*bitmap = bitval;
}

SetType *
set_add_int32(SetType *set, Datum dvalue)
{
	int		byte;
	int		bit;
	uint32	hash;
	uint32	position;
	char   *bitmap;
	int32  *values;                 // set actual data begins.
    
    bitmap  = SET_BITMAP(set);
    values  = (int32 *) SET_DATA_PTR(set); // cannot cat to Datum (that's 8 byte). not sure this part.
	
	int32 value = DatumGetInt32(dvalue);
    hash = hash_bytes_uint32((uint32) value);
    position = hash % set->capacity;

	while (true)
	{
		byte = (position / 8);
		bit = (position % 8);

		/* The item is already used - maybe it's the same value? */
		if (bitmap[byte] & (0x01 << bit))
		{
			/* Same value, we're done */
			if (values[position] == value)
				break;
			position = (position + HASHSET_STEP) % set->capacity;
			continue;
		}

		/* Found an empty spot, before hitting the value first */
		bitmap[byte] |= (0x01 << bit);
		values[position] = value;

		set->nelements++;

		break;
	}
	return set;
}

SetType *
set_add_int64(SetType *set, Datum dvalue)
{
	int		byte;
	int		bit;
	uint32	hash;
	uint32	position;
	char   *bitmap;
	Datum  *datums;                 // set actual data begins.
	Datum tempd; 
	
    hash	 = DatumGetUInt32(DirectFunctionCall1(hashint8,dvalue));

    position = hash % set->capacity;
    bitmap   = 	SET_BITMAP(set);
    datums   =	(Datum *)SET_DATA_PTR(set);
	
	while (true)
	{
		byte = (position / 8);
		bit = (position % 8);

		/* The item is already used - maybe it's the same value? */
		if (bitmap[byte] & (0x01 << bit))
		{
			/* Same value, we're done */
			if (DatumGetBool(DirectFunctionCall2(int8eq,datums[position],dvalue)))	
				break;

			position = (position + HASHSET_STEP) % set->capacity;
			continue;
		}

		/* Found an empty spot, before hitting the value first */
		bitmap[byte] |= (0x01 << bit);
		datums[position] = dvalue;

		set->nelements++;

		break;
	}
	return set;
}

SetType *
set_add_timestamp(SetType *set, Datum dvalue)
{
	int		byte;
	int		bit;
	uint32	hash;
	uint32	position;
	char   *bitmap;
	Datum  *datums;                 // set actual data begins.
	Datum tempd; 

	tempd	= DirectFunctionCall1(timestamp_hash,dvalue);
    hash	 = DatumGetUInt32(tempd);

    position = hash % set->capacity;
    bitmap   = 	SET_BITMAP(set);
    datums   =	(Datum *)SET_DATA_PTR(set);

	while (true)
	{
		byte = (position / 8);
		bit = (position % 8);

		/* The item is already used - maybe it's the same value? */
		if (bitmap[byte] & (0x01 << bit))
		{
			/* Same value, we're done */
			if (DatumGetBool(
					DirectFunctionCall2(timestamp_eq,datums[position],dvalue)))
				break;

			position = (position + HASHSET_STEP) % set->capacity;
			continue;
		}

		/* Found an empty spot, before hitting the value first */
		bitmap[byte] |= (0x01 << bit);
		datums[position] = dvalue;

		set->nelements++;

		break;
	}
	return set;
}

SetType *set_add_uuid(SetType *set, Datum dvalue)
{
	int		byte;
	int		bit;
	uint32	hash;
	uint32	position;
	char   *bitmap;
	Datum  *datums;                 // set actual data begins.
	Datum tempd; 

	tempd	= DirectFunctionCall1(uuid_hash,dvalue);
    hash	 = DatumGetUInt32(tempd);

    position = hash % set->capacity;
    bitmap   = 	SET_BITMAP(set);
    datums   =	(Datum *)SET_DATA_PTR(set);

	while (true)
	{
		byte = (position / 8);
		bit = (position % 8);

		/* The item is already used - maybe it's the same value? */
		if (bitmap[byte] & (0x01 << bit))
		{
			/* Same value, we're done */
			if (DatumGetBool(DirectFunctionCall2(uuid_eq,datums[position],dvalue)))
				break;

			position = (position + HASHSET_STEP) % set->capacity;
			continue;
		}

		/* Found an empty spot, before hitting the value first */
		bitmap[byte] |= (0x01 << bit);
		datums[position] = dvalue;

		set->nelements++;

		break;
	}
	return set;
}

SetType *
set_add_interval(SetType *set, Datum dvalue)
{
	int		byte;
	int		bit;
	uint32	hash;
	uint32	position;
	char   *bitmap;
	Datum  *datums;                 // set actual data begins.
	Datum tempd; 

	tempd	= DirectFunctionCall1(interval_hash,dvalue);
    hash	 = DatumGetUInt32(tempd);

    position = hash % set->capacity;
    bitmap   = 	SET_BITMAP(set);
    datums   =	(Datum *)SET_DATA_PTR(set);

	while (true)
	{
		byte = (position / 8);
		bit = (position % 8);

		/* The item is already used - maybe it's the same value? */
		if (bitmap[byte] & (0x01 << bit))
		{
			/* Same value, we're done */
			if (DatumGetBool(DirectFunctionCall2(interval_eq,datums[position],dvalue)))
				break;

			position = (position + HASHSET_STEP) % set->capacity;
			continue;
		}

		/* Found an empty spot, before hitting the value first */
		bitmap[byte] |= (0x01 << bit);
		datums[position] = dvalue;

		set->nelements++;

		break;
	}
	return set;
}

/*
 * construct_empty_set.	--- make a zero-dimensional set of given type
 */
SetType *
construct_empty_set(Oid elmtype)
{
	SetType  *result;

	result = (SetType *) palloc0(sizeof(SetType));
	SET_VARSIZE(result, sizeof(SetType));
    result->capacity = 0;
    result->dataoffset = 0;
    result->elemtype    = elmtype;
	return result;
}


char *
SetOut(SetType *retval)
{
	StringInfoData str;

	/* Initialize the StringInfo buffer */
	initStringInfo(&str);

	/* Append the opening brace for the output hashset string */
	appendStringInfoChar(&str, '{');
    char    *bitmap = SET_BITMAP(retval);
	//int out.
	if ( retval->elemtype == 23)
	{
        int32   *output     = (int32 *) SET_DATA_PTR(retval);

		/* Loop through the elements and append them to the string */
        for(int i = 0; i < retval->capacity; i++)
        {
            int byte = i / 8;
            int bit = i % 8;

            /* Check if the bit in the bitmap is set */
            if (bitmap[byte] & (0x01 << bit))
            {
                /* Append the value */
                if (str.len > 1)
                    appendStringInfoChar(&str, ',');
                appendStringInfo(&str, "%d", output[i]);
            }
        }
	}
	//int64 out
	if (retval->elemtype == 20)
	{
        Datum  *datums     = (Datum *) SET_DATA_PTR(retval);

		/* Loop through the elements and append them to the string */
        for(int i = 0; i < retval->capacity; i++)
        {
            int byte = i / 8;
            int bit = i % 8;

            /* Check if the bit in the bitmap is set */
            if (bitmap[byte] & (0x01 << bit))
            {
                /* Append the value */
                if (str.len > 1)
                    appendStringInfoChar(&str, ',');
                appendStringInfo(&str, "%ld", DatumGetInt64(datums[i]));
            }
        }
	}
	//timestamp out
    if (retval->elemtype == 1114)
	{
		Datum   *datums     = (Datum *) SET_DATA_PTR(retval);

		for (int i = 0; i < retval->capacity; i++)
		{
			int byte = i / 8;
            int bit = i % 8;

			/* Check if the bit in the bitmap is set */
            if (bitmap[byte] & (0x01 << bit))
            {
                /* Append the value */
                if (str.len > 1)
                    appendStringInfoChar(&str, ',');			 	
				appendStringInfo(&str, "%s",DatumGetCString(DirectFunctionCall1(timestamp_out,datums[i])));
            }
		}
	}
	// interval out
	if ( retval->elemtype == 1186)
	{
		Datum   *datums     = (Datum *) SET_DATA_PTR(retval);
		for (int i = 0; i < retval->capacity; i++)
		{
			int byte = i / 8;
            int bit = i % 8;

			/* Check if the bit in the bitmap is set */
            if (bitmap[byte] & (0x01 << bit))
            {
                /* Append the value */
                if (str.len > 1)
                    appendStringInfoChar(&str, ',');			 	
				appendStringInfo(&str, "%s",DatumGetCString(DirectFunctionCall1(interval_out,datums[i])));
            }
		}
	}
	//uuid out
	if ( retval->elemtype == 2950)
	{
		Datum   *datums     = (Datum *) SET_DATA_PTR(retval);
		for (int i = 0; i < retval->capacity; i++)
		{
			int byte = i / 8;
            int bit = i % 8;

			/* Check if the bit in the bitmap is set */
            if (bitmap[byte] & (0x01 << bit))
            {
                /* Append the value */
                if (str.len > 1)
                    appendStringInfoChar(&str, ',');			 	
				appendStringInfo(&str, "%s",DatumGetCString(DirectFunctionCall1(uuid_out,datums[i])));
            }
		}
	}
	/* Append the closing brace for the output hashset string */
	appendStringInfoChar(&str, '}');
	return str.data;
}

Reply via email to