Re: Do we want a hashset type?

jian he Thu, 22 Jun 2023 23:41:07 -0700

I played around array_func.c
many of the code can be used for multiset data type.
now I imagine multiset as something like one dimension array. (nested is
somehow beyond the imagination...).

* A standard varlena array has the following internal structure:
 *   <vl_len_> - standard varlena header word
 *   <ndim> - number of dimensions of the array
 *   <dataoffset> - offset to stored data, or 0 if no nulls bitmap
 *   <elemtype> - element type OID
 *   <dimensions> - length of each array axis (C array of int)
 *   <lower bnds> - lower boundary of each dimension (C array of int)
 *   <null bitmap> - bitmap showing locations of nulls (OPTIONAL)
 *   <actual data> - whatever is the stored data

in set/multiset, we don't need {ndim,lower bnds}, since we are only one
dimension, also we don't need subscript.
So for set we can have following
*  int32 vl_len_; /* varlena header (do not touch directly!) */
*  int32       capacity; /* # of capacity */
*  int32 dataoffset; /* offset to data, or 0 if no bitmap */
*  int32 nelements; /* number of items added to the hashset */
*  Oid elemtype; /* element type OID */
 *  <null bitmap> - bitmap showing locations of nulls (OPTIONAL)
 *  <bitmap> - bitmap showing this slot is empty or not  ( I am not sure
this part)
 *  <actual data> - whatever is the stored data

many of the code in array_func.c can be reused.
array_isspace     ==> set_isspace
ArrayMetaState  ==> SetMetastate
ArrayCount         ==> SetCount (similar to ArrayCount return the dimension
of set, should be zero (empty set) or one)
ArrayParseState ==> SetParseState
ReadArrayStr     ==>  ReadSetStr

attached is a demo shows that use array_func.c to parse cstring. have
similar effect of  array_in.
for multiset_in set type input function. if no duplicate required then
multiset_in would just like array, so more code can be copied from
array_func.c
but if unique required then we need first palloc0(capacity * datums (size
per type)) then put valid value into to a specific slot?

On Fri, Jun 23, 2023 at 6:27 AM Tomas Vondra <tomas.von...@enterprisedb.com>
wrote:

> On 6/22/23 19:52, Joel Jacobson wrote:
> > On Tue, Jun 20, 2023, at 14:10, Tomas Vondra wrote:
> >> This is also what the SQL standard does for multisets - there's SQL:20nn
> >> draft at http://www.wiscorp.com/SQLStandards.html, and the <member
> >> predicate> section (p. 475) explains how this should work with NULL.
> >
> > I've looked again at the paper you mentioned and found something
> intriguing
> > in section 2.6 (b). I'm a bit puzzled about this: why would we want to
> return
> > null when we're certain it's not null but just doesn't have any elements?
> >
> > In the same vein, it says, "If it has more than one element, an
> exception is
> > raised." Makes sense to me, but what about when there are no elements at
> all?
> > Why not raise an exception in that case too?
> >
> > The ELEMENT function is designed to do one simple thing: return the
> element of
> > a multiset if the multiset has only 1 element. This seems very similar
> to how
> > our INTO STRICT operates, right?
> >
>
> I agree this looks a bit weird, but that's what I mentioned - this is an
> initial a proposal, outlining the idea. Inevitably some of the stuff
> will get reworked or just left out of the final version. It's useful
> mostly to explain the motivation / goal.
>
> I believe that's the case here - I don't think the ELEMENT got into the
> standard at all, and the NULL rules for the MEMBER OF clause seem not to
> have these strange bits.
>
> > The SQL:20nn seems to still be in draft form, and I can't help but
> wonder if we
> > should propose a bit of an improvement here:
> >
> > "If it doesn't have exactly one element, an exception is raised."
> >
> > Meaning, it would raise an exception both if there are more elements,
> > or zero elements (no elements).
> >
> > I think this would make the semantics more intuitive and less surprising.
> >
>
> Well, the simple truth is the draft is freely available, but you'd need
> to buy the final version. It doesn't mean it's still being worked on or
> that no SQL standard was released since then. In fact, SQL 2023 was
> released a couple weeks ago [1].
>
> It'd be interesting to know the version that actually got into the SQL
> standard (if at all), but I don't have access to the standard yet.
>
> regards
>
>
> [1] https://www.iso.org/standard/76584.html
>
> --
> Tomas Vondra
> EnterpriseDB: http://www.enterprisedb.com
> The Enterprise PostgreSQL Company
>

-- 
 I recommend David Deutsch's <<The Beginning of Infinity>>

  Jian

/*
gcc -I/home/jian/postgres/2023_05_25_beta5421/include/server -fPIC -c /home/jian/Desktop/regress_pgsql/set.c
gcc -shared  -o /home/jian/Desktop/regress_pgsql/set.so /home/jian/Desktop/regress_pgsql/set.o

CREATE OR REPLACE FUNCTION set_in_test(cstring,int, int) RETURNS BOOL SET search_path from current
        AS '/home/jian/Desktop/regress_pgsql/set', 'set_in_test'
        LANGUAGE C IMMUTABLE;
select  set_in_test('{1,2,3,NULL,NULL, NULL}',23,-1);

*/
#include "postgres.h"
#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "utils/builtins.h"
// #include "utils/array.h"
#include "utils/numeric.h"
#include "funcapi.h"
#include "utils/lsyscache.h"
#include "utils/fmgrprotos.h"
#include "common/hashfn.h"

PG_MODULE_MAGIC;

PG_FUNCTION_INFO_V1(set_in_test);
#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))

/*
 * Arrays are varlena objects, so must meet the varlena convention that
 * the first int32 of the object contains the total object size in bytes.
 * Be sure to use VARSIZE() and SET_VARSIZE() to access it, though!
 *
 * CAUTION: if you change the header for ordinary arrays you will also
 * need to change the headers for oidvector and int2vector!
 */
typedef struct SetType
{
	int32		vl_len_;		/* varlena header (do not touch directly!) */
	int32       capacity;		/* # of capacity */
	int32		dataoffset;		/* offset to data, or 0 if no bitmap */
	int32		nelements;		/* number of items added to the hashset */
	Oid			elemtype;		/* element type OID */
} SetType;

#define SET_SIZE(a)				VARSIZE(a)
#define SET_ITEM(a)				((a)->nelements)
#define SET_CAPACITY(a)         ((a)->capacity)
#define SET_HASNULL(a)			((a)->dataoffset != 0)
#define SET_ELEMTYPE(a)			((a)->elemtype)

#define SET_OVERHEAD_NONULLS(capacity) \
		MAXALIGN(sizeof(SetType) + CEIL_DIV(capacity, 8))

#define SET_OVERHEAD_WITHNULLS(capacity,nelements) \
		MAXALIGN(sizeof(SetType) + CEIL_DIV(capacity, 8) + \
				 ((nelements) + 7) / 8)

#define SET_DATA_OFFSET(a) \
		(SET_HASNULL(a) ? (a)->dataoffset : SET_OVERHEAD_NONULLS(SET_CAPACITY(a)))

#define SET_NULLBITMAP(a) \
		(SET_HASNULL(a) ? \
		 (bits8 *) (((char *) (a)) + sizeof(SetType) + \
					((a->nelements) + 7) / 8) \
		 : (bits8 *) NULL)

/*
 * Returns a pointer to the actual array data.
 */
#define SET_DATA_PTR(a) \
		(((char *) (a)) + SET_DATA_OFFSET(a))

typedef struct SetMetaState
{
	Oid			element_type;
	int16		typlen;
	bool		typbyval;
	char		typalign;
	char		typdelim;
	Oid			typioparam;
	Oid			typiofunc;
	FmgrInfo	proc;
} SetMetaState;

static int
SetCount(const char *str, int *dim, char typdelim, Node *escontext); 

static bool
set_get_isnull(const bits8 *nullbitmap, int offset);


/*
 * Check whether a specific array element is NULL
 *
 * nullbitmap: pointer to array's null bitmap (NULL if none)
 * offset: 0-based linear element number of array element
 */
static bool
set_get_isnull(const bits8 *nullbitmap, int offset)
{
	if (nullbitmap == NULL)
		return false;			/* assume not null */
	if (nullbitmap[offset / 8] & (1 << (offset % 8)))
		return false;			/* not null */
	return true;
}

/*
 * Copy datum to *dest and return total space used (including align padding)
 *
 * Caller must have handled case of NULL element
 */
static int
SetCastAndSet(Datum src,
				int typlen,
				bool typbyval,
				char typalign,
				char *dest)
{
	int			inc;

	if (typlen > 0)
	{
		if (typbyval)
			store_att_byval(dest, src, typlen);
		else
			memmove(dest, DatumGetPointer(src), typlen);
		inc = att_align_nominal(typlen, typalign);
	}
	else
	{
		Assert(!typbyval);
		inc = att_addlength_datum(0, typlen, src);
		memmove(dest, DatumGetPointer(src), inc);
		inc = att_align_nominal(inc, typalign);
	}

	return inc;
}

/*
 * Copy data into an array object from a temporary array of Datums.
 *
 * array: array object (with header fields already filled in)
 * values: array of Datums to be copied
 * nulls: array of is-null flags (can be NULL if no nulls)
 * nitems: number of Datums to be copied
 * typbyval, typlen, typalign: info about element datatype
 * freedata: if true and element type is pass-by-ref, pfree data values
 * referenced by Datums after copying them.
 *
 * If the input data is of varlena type, the caller must have ensured that
 * the values are not toasted.  (Doing it here doesn't work since the
 * caller has already allocated space for the array...)
 */
void
CopySetEls(SetType *array,
			 Datum *values,
			 bool *nulls,
			 int nitems,
			 int typlen,
			 bool typbyval,
			 char typalign,
			 bool freedata)
{
	char	   *p       = SET_DATA_PTR(array);
	bits8	   *bitmap  = SET_NULLBITMAP(array);
	int			bitval = 0;
	int			bitmask = 1;
	int			i;

	if (typbyval)
		freedata = false;

	for (i = 0; i < nitems; i++)
	{
		if (nulls && nulls[i])
		{
			if (!bitmap)		/* shouldn't happen */
				elog(ERROR, "null array element where not supported");
			/* bitmap bit stays 0 */
		}
		else
		{
			bitval |= bitmask;
			p += SetCastAndSet(values[i], typlen, typbyval, typalign, p);
			if (freedata)
				pfree(DatumGetPointer(values[i]));
		}
		if (bitmap)
		{
			bitmask <<= 1;
			if (bitmask == 0x100)
			{
				*bitmap++ = bitval;
				bitval = 0;
				bitmask = 1;
			}
		}
	}

	if (bitmap && bitmask != 1)
		*bitmap = bitval;
}


/*
 * array_isspace() --- a non-locale-dependent isspace()
 *
 * We used to use isspace() for parsing array values, but that has
 * undesirable results: an array value might be silently interpreted
 * differently depending on the locale setting.  Now we just hard-wire
 * the traditional ASCII definition of isspace().
 */
static bool
set_isspace(char ch)
{
	if (ch == ' ' ||
		ch == '\t' ||
		ch == '\n' ||
		ch == '\r' ||
		ch == '\v' ||
		ch == '\f')
		return true;
	return false;
}


/*
 * ReadSetStr :
 *	 parses the array string pointed to by "arrayStr" and converts the values
 *	 to internal format.  Unspecified elements are initialized to nulls.
 *	 The array dimensions must already have been determined.
 *
 * Inputs:
 *	arrayStr: the string to parse.
 *			  CAUTION: the contents of "arrayStr" will be modified!
 *	origStr: the unmodified input string, used only in error messages.
 *	nitems: total number of array elements, as already determined.
 *	ndim: number of array dimensions
 *	dim[]: array axis lengths
 *	inputproc: type-specific input procedure for element datatype.
 *	typioparam, typmod: auxiliary values to pass to inputproc.
 *	typdelim: the value delimiter (type-specific).
 *	typlen, typbyval, typalign: storage parameters of element datatype.
 *
 * Outputs:
 *	values[]: filled with converted data values.
 *	nulls[]: filled with is-null markers.
 *	*hasnulls: set true iff there are any null elements.
 *	*nbytes: set to total size of data area needed (including alignment
 *		padding but not including array header overhead).
 *	*escontext: if this points to an ErrorSaveContext, details of
 *		any error are reported there.
 *
 * Result:
 *	true for success, false for failure (if escontext is provided).
 *
 * Note that values[] and nulls[] are allocated by the caller, and must have
 * nitems elements.
 */

typedef enum
{
	SET_NO_LEVEL,
	SET_LEVEL_STARTED,
	SET_ELEM_STARTED,
	SET_ELEM_COMPLETED,
	SET_QUOTED_ELEM_STARTED,
	SET_QUOTED_ELEM_COMPLETED,
	SET_ELEM_DELIMITED,
	SET_LEVEL_COMPLETED,
	SET_LEVEL_DELIMITED
} SetParseState;

/*
 * SetCount
 *	 Determines the dimensions for an array string.
 *
 * Returns number of dimensions as function result.  The axis lengths are
 * returned in dim[], which must be of size MAXDIM.
 *
 * If we detect an error, fill *escontext with error details and return -1
 * (unless escontext isn't provided, in which case errors will be thrown).
 */
#undef  MAXDIM
#define MAXDIM  1

static	int
SetCount(const char *str, int *dim, char typdelim, Node *escontext)
{
	int			nest_level = 0,
				i;
	int			ndim = 1,
				temp[MAXDIM],
				nelems[MAXDIM],
				nelems_last[MAXDIM];
	bool		in_quotes = false;
	bool		eoArray = false;
	bool		empty_array = true;
	const char *ptr;
	SetParseState parse_state = SET_NO_LEVEL;

	for (i = 0; i < MAXDIM; ++i)
	{
		temp[i] = dim[i] = nelems_last[i] = 0;
		nelems[i] = 1;
	}

	ptr = str;
	while (!eoArray)
	{
		bool		itemdone = false;

		while (!itemdone)
		{
			if (parse_state == SET_ELEM_STARTED ||
				parse_state == SET_QUOTED_ELEM_STARTED)
				empty_array = false;

			switch (*ptr)
			{
				case '\0':
					/* Signal a premature end of the string */
					ereturn(escontext, -1,
							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
							 errmsg("malformed array literal: \"%s\"", str),
							 errdetail("Unexpected end of input.")));
				case '\\':

					/*
					 * An escape must be after a level start, after an element
					 * start, or after an element delimiter. In any case we
					 * now must be past an element start.
					 */
					if (parse_state != SET_LEVEL_STARTED &&
						parse_state != SET_ELEM_STARTED &&
						parse_state != SET_QUOTED_ELEM_STARTED &&
						parse_state != SET_ELEM_DELIMITED)
						ereturn(escontext, -1,
								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
								 errmsg("malformed array literal: \"%s\"", str),
								 errdetail("Unexpected \"%c\" character.",
										   '\\')));
					if (parse_state != SET_QUOTED_ELEM_STARTED)
						parse_state = SET_ELEM_STARTED;
					/* skip the escaped character */
					if (*(ptr + 1))
						ptr++;
					else
						ereturn(escontext, -1,
								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
								 errmsg("malformed array literal: \"%s\"", str),
								 errdetail("Unexpected end of input.")));
					break;
				case '"':

					/*
					 * A quote must be after a level start, after a quoted
					 * element start, or after an element delimiter. In any
					 * case we now must be past an element start.
					 */
					if (parse_state != SET_LEVEL_STARTED &&
						parse_state != SET_QUOTED_ELEM_STARTED &&
						parse_state != SET_ELEM_DELIMITED)
						ereturn(escontext, -1,
								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
								 errmsg("malformed array literal: \"%s\"", str),
								 errdetail("Unexpected array element.")));
					in_quotes = !in_quotes;
					if (in_quotes)
						parse_state = SET_QUOTED_ELEM_STARTED;
					else
						parse_state = SET_QUOTED_ELEM_COMPLETED;
					break;
				case '{':
					if (!in_quotes)
					{
						/*
						 * A left brace can occur if no nesting has occurred
						 * yet, after a level start, or after a level
						 * delimiter.
						 */
						if (parse_state != SET_NO_LEVEL &&
							parse_state != SET_LEVEL_STARTED &&
							parse_state != SET_LEVEL_DELIMITED)
							ereturn(escontext, -1,
									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
									 errmsg("malformed array literal: \"%s\"", str),
									 errdetail("Unexpected \"%c\" character.",
											   '{')));
						parse_state = SET_LEVEL_STARTED;
						if (nest_level >= MAXDIM)
							ereturn(escontext, -1,
									(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
									 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
											nest_level + 1, MAXDIM)));
						temp[nest_level] = 0;
						nest_level++;
						if (ndim < nest_level)
							ndim = nest_level;
					}
					break;
				case '}':
					if (!in_quotes)
					{
						/*
						 * A right brace can occur after an element start, an
						 * element completion, a quoted element completion, or
						 * a level completion.
						 */
						if (parse_state != SET_ELEM_STARTED &&
							parse_state != SET_ELEM_COMPLETED &&
							parse_state != SET_QUOTED_ELEM_COMPLETED &&
							parse_state != SET_LEVEL_COMPLETED &&
							!(nest_level == 1 && parse_state == SET_LEVEL_STARTED))
							ereturn(escontext, -1,
									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
									 errmsg("malformed array literal: \"%s\"", str),
									 errdetail("Unexpected \"%c\" character.",
											   '}')));
						parse_state = SET_LEVEL_COMPLETED;
						if (nest_level == 0)
							ereturn(escontext, -1,
									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
									 errmsg("malformed array literal: \"%s\"", str),
									 errdetail("Unmatched \"%c\" character.", '}')));
						nest_level--;

						if (nelems_last[nest_level] != 0 &&
							nelems[nest_level] != nelems_last[nest_level])
							ereturn(escontext, -1,
									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
									 errmsg("malformed array literal: \"%s\"", str),
									 errdetail("Multidimensional arrays must have "
											   "sub-arrays with matching "
											   "dimensions.")));
						nelems_last[nest_level] = nelems[nest_level];
						nelems[nest_level] = 1;
						if (nest_level == 0)
							eoArray = itemdone = true;
						else
						{
							/*
							 * We don't set itemdone here; see comments in
							 * ReadSetStr
							 */
							temp[nest_level - 1]++;
						}
					}
					break;
				default:
					if (!in_quotes)
					{
						if (*ptr == typdelim)
						{
							/*
							 * Delimiters can occur after an element start, an
							 * element completion, a quoted element
							 * completion, or a level completion.
							 */
							if (parse_state != SET_ELEM_STARTED &&
								parse_state != SET_ELEM_COMPLETED &&
								parse_state != SET_QUOTED_ELEM_COMPLETED &&
								parse_state != SET_LEVEL_COMPLETED)
								ereturn(escontext, -1,
										(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
										 errmsg("malformed array literal: \"%s\"", str),
										 errdetail("Unexpected \"%c\" character.",
												   typdelim)));
							if (parse_state == SET_LEVEL_COMPLETED)
								parse_state = SET_LEVEL_DELIMITED;
							else
								parse_state = SET_ELEM_DELIMITED;
							itemdone = true;
							nelems[nest_level - 1]++;
						}
						else if (!set_isspace(*ptr))
						{
							/*
							 * Other non-space characters must be after a
							 * level start, after an element start, or after
							 * an element delimiter. In any case we now must
							 * be past an element start.
							 */
							if (parse_state != SET_LEVEL_STARTED &&
								parse_state != SET_ELEM_STARTED &&
								parse_state != SET_ELEM_DELIMITED)
								ereturn(escontext, -1,
										(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
										 errmsg("malformed array literal: \"%s\"", str),
										 errdetail("Unexpected array element.")));
							parse_state = SET_ELEM_STARTED;
						}
					}
					break;
			}
			if (!itemdone)
				ptr++;
		}
		temp[ndim - 1]++;
		ptr++;
	}

	/* only whitespace is allowed after the closing brace */
	while (*ptr)
	{
		if (!set_isspace(*ptr++))
			ereturn(escontext, -1,
					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
					 errmsg("malformed array literal: \"%s\"", str),
					 errdetail("Junk after closing right brace.")));
	}

	/* special case for an empty array */
	if (empty_array)
		return 0;

	for (i = 0; i < ndim; ++i)
		dim[i] = temp[i];

	return ndim;
}

bool    Array_nulls = true;
static bool
ReadSetStr(char   *arrayStr,
            const char *origStr,
            int     nitems,
            int     ndim,
            int     *dim,
            FmgrInfo *inputproc,
            Oid     typioparam,
            int32   typmod,
            char    typdelim,
            int     typlen,
            bool    typbyval,
            char    typalign,
            Datum   *values,
            bool    *nulls,
            bool    *hasnulls,
            int32   *nbytes,
            Node    *escontext)
{
	int		i;
    char    *srcptr;
    bool    in_quotes   = false;
    bool    eoArray     = false;
    bool    hasnull;
    int32   totbytes;
    int     indx	= 0;

	/* Initialize is-null markers to true */
    memset(nulls, true, nitems * sizeof(bool));        

	/*
	 * We have to remove " and \ characters to create a clean item value to
	 * pass to the datatype input routine.  We overwrite each item value
	 * in-place within arrayStr to do this.  srcptr is the current scan point,
	 * and dstptr is where we are copying to.
	 *
	 * We also want to suppress leading and trailing unquoted whitespace. We
	 * use the leadingspace flag to suppress leading space.  Trailing space is
	 * tracked by using dstendptr to point to the last significant output
	 * character.
	 *
	 * The error checking in this routine is mostly pro-forma, since we expect
	 * that SetCount() already validated the string.  So we don't bother
	 * with errdetail messages.
	 */
    srcptr  = arrayStr;
    while (!eoArray)
    {
        bool    itemdone = false;
        bool    leadingspace = true;
        bool    hasquoting  = false;
        char    *itemstart;
        char    *dstptr;
        char    *dstendptr;

        itemstart = dstptr = dstendptr = srcptr;

        while (!itemdone)
        {
            switch(*srcptr)
            {
                case '\0':
					/* Signal a premature end of the string */
					/* Signal a premature end of the string */
					ereturn(escontext, false,
							(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
							 errmsg("malformed array literal: \"%s\"",
									origStr)));
                    break;
                case '\\':
                    /* Skip backslash, copy next character as-is. */
                    srcptr++;
                    if (*srcptr == '\0')
                        ereturn(escontext,false,
                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                     errmsg("malformed array literal: \"%s\"",
                                            origStr)));
                    /* Treat the escaped character as non-whitespace*/                                    
                    leadingspace = false;
                    dstendptr   = dstptr;
                    hasquoting  = false;    /* can't be a NULL marker */
                    break;

                case '"': 
                    in_quotes   = !in_quotes;
                    if (in_quotes)
                        leadingspace = false;
                    else
                    {
						/*
						 * Advance dstendptr when we exit in_quotes; this
						 * saves having to do it in all the other in_quotes
						 * cases.
                         */                        
                        dstendptr   = dstptr;
                    }
                    hasquoting  = true;     /* can't be a NULL marker */
                    srcptr++;
                    break;
                case '{':
                    if (!in_quotes)
                    {
                        srcptr++;
                    }
                    else
                        *dstptr++   = *srcptr++;
                    break;
                case '}': 
                    if (!in_quotes)
                    {
						eoArray	= itemdone = true;
						srcptr++;
                    }
                    else
                        *dstptr++   = *srcptr++;
                    break;
                default :
                    if(in_quotes)
                        *dstptr++ = *srcptr++;
                    else if (*srcptr == typdelim)
                    {
                        itemdone = true;                        
                        srcptr ++;
                    }
                    else if (set_isspace(*srcptr))
                    {
                        /*
                        * If leading space, drop it immediately.  Else, copy
                        * but don't advance dstendptr.
                        */
                        if(leadingspace)
                            srcptr++;
                        else
                            *dstptr++ = *srcptr++;            
                    }
                    else
                    {
                        *dstptr++   = *srcptr++;
                        leadingspace    = false;
                        dstendptr       = dstptr;
                    }             
                    break;
            }
        }
        
        Assert(dstptr   < srcptr);
        *dstendptr  = '\0';

        if (Array_nulls && !hasquoting &&
			pg_strcasecmp(itemstart, "NULL") == 0)
		{
			/* it's a NULL item */
			if (!InputFunctionCallSafe(inputproc, NULL,
									   typioparam, typmod,
									   escontext,
									   &values[indx]))
				return false;
			nulls[indx]	= true;
			indx++;
		}
		else
		{
			elog(INFO,"line %d indx:%d itemstart:%s, typioparam:%d,typmod:%d",__LINE__,indx,itemstart, typioparam,typmod);
			if (!InputFunctionCallSafe(inputproc, itemstart, 
										typioparam, typmod, 
										escontext,
										&values[indx]))
				return false;
			nulls[indx] = false;
			indx++;				
		}
    }

	/*
	 * Check for nulls, compute total data space needed
	 */
	hasnull	= false;
	totbytes	= 0;
	elog(INFO,"line[%03d]nitems=%d, dim=%d",__LINE__,nitems,*dim);
	for (i = 0; i < nitems; i++)
	{
		if(nulls[i])
			hasnull	= true;
		else
		{
			/* let's just make sure data is not toasted */
			if (typlen ==  -1)
				values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
			totbytes = att_addlength_datum(totbytes, typlen, values[i]);
			totbytes = att_align_nominal(totbytes, typalign);
			
			elog(INFO,"line[%03d] total bytes: %d",__LINE__,totbytes);
			/* check for overflow of total request */
			if (!AllocSizeIsValid(totbytes))
				ereturn(escontext, false,
						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
						 errmsg("array size exceed the maximum allowed (%d)",
						(int) MaxAllocSize)));			
		}
	}
	*hasnulls 	= hasnull;
	*nbytes		= totbytes;
	
	elog(INFO,"total bytes: %d, hasnulls:%d",totbytes, *hasnulls);
	return	 true;	
}            

//imitate array_in.
Datum
set_in_test(PG_FUNCTION_ARGS)
{
	char	   *string = PG_GETARG_CSTRING(0);		/* external form */
	Oid			element_type = PG_GETARG_OID(1);	/* type of an array * element */
	int32		typmod = PG_GETARG_INT32(2);	/* typmod for array elements */

    Node	   *escontext = fcinfo->context;
    int         typlen; 
    bool        typbyval;
    char        typalign;
    char    	typdelim   = ','; 
    Oid         typioparam;
    char        *string_save,
                *p;                
    int     nitems;
	Datum	   *dataPtr;
	bool	   *nullsPtr;
    bool        hasnulls;
    int32       nbytes;
    int32       dataoffset;
    SetType   *retval;
    int     dim;
    int     ndim;
    SetMetaState *my_extra;
	
	/*
	 * We arrange to look up info about element type, including its input
	 * conversion proc, only once per series of calls, assuming the element
	 * type doesn't change underneath us.
	 */	
	my_extra	= (SetMetaState *) fcinfo->flinfo->fn_extra;
	if (my_extra == NULL)
	{
		fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt
													,sizeof(SetMetaState));
		my_extra	= (SetMetaState *) fcinfo->flinfo->fn_extra;
		my_extra->element_type	= ~element_type;
	}

	if (my_extra->element_type != element_type)
	{
		/*
		 * Get info about element type, including its input conversion proc
		 */
		get_type_io_data(element_type,IOFunc_input,
						 &my_extra->typlen,&my_extra->typbyval,
						 &my_extra->typalign, &my_extra->typdelim,
						 &my_extra->typioparam,&my_extra->typiofunc);
		fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
					fcinfo->flinfo->fn_mcxt);
		my_extra->element_type = element_type; 
	}
	typlen		= my_extra->typlen;
	typbyval	= my_extra->typbyval;
	typalign	= my_extra->typalign; 
	typdelim	= my_extra->typdelim; 
	typioparam	= my_extra->typioparam;

	elog(INFO,"typlen: %d, typbyval:%d typalign %c typdelim %c typioparam %d"
						,typlen,typbyval, typalign, typdelim, typioparam);

    ndim    = SetCount(string,&dim,typdelim, escontext);
    // ndim eith zero or 1. So plan accordingly.
    nitems  = dim;
    dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
	nullsPtr = (bool *) palloc(nitems * sizeof(bool));

	/* Make a modifiable copy of the input */
	string_save = pstrdup(string);

	/*
	 * If the input string starts with dimension info, read and use that.
	 * Otherwise, we require the input to be in curly-brace style, and we
	 * prescan the input to determine dimensions.
	 *
	 * Dimension info takes the form of one or more [n] or [m:n] items. The
	 * outer loop iterates once per dimension item.
	 */
	p = string_save;
	
	elog(INFO,"line %d, function oid : %d",__LINE__,*(&my_extra->proc.fn_oid));
	Assert(nitems == dim);
    if (!ReadSetStr(p, string,
					  nitems, ndim, &dim,
					  &my_extra->proc, typioparam, typmod,
					  typdelim,
					  typlen, typbyval, typalign,
					  dataPtr, nullsPtr,
					  &hasnulls, &nbytes, escontext))
	{
		elog(INFO,"ReadSetStr FALSE, nbytes %d, hasnulls %d dim:%d",nbytes,hasnulls,dim);
	    PG_RETURN_BOOL(false);
	}
	elog(INFO,"ReadSetStr OK, nbytes %d, hasnulls %d dim:%d",nbytes,hasnulls,dim);

    int capacity =64;

	if (hasnulls)
	{
		dataoffset	= SET_OVERHEAD_WITHNULLS(capacity, nitems);
        elog(INFO,"line %3d SET_OVERHEAD_WITHNULLS :%ld",__LINE__,SET_OVERHEAD_WITHNULLS(capacity, nitems));
		nbytes 		+=  dataoffset;
	}
	else
	{
		dataoffset	= 0;
        elog(INFO,"LINE[%03d] SET_OVERHEAD_NONULLS: %ld",__LINE__,SET_OVERHEAD_NONULLS(capacity));
        elog(INFO,"line %03d test %ld", __LINE__,MAXALIGN(sizeof(SetType) + CEIL_DIV(capacity, 8)));
		nbytes		+= SET_OVERHEAD_NONULLS(capacity); 
	}
	retval	= (SetType *) palloc0(nbytes);
	SET_VARSIZE(retval, nbytes);
	elog(INFO,"LINE[%03d] nbytes = %d",__LINE__,nbytes);
	Assert(ndim == 1);          // or deal with ndim = 0;
	retval->nelements			= nitems;
	retval->dataoffset		    = dataoffset;
	retval->capacity		    = capacity;

	/*
	 * This comes from the array's pg_type.typelem (which points to the base
	 * data type's pg_type.oid) and stores system oids in user tables. This
	 * oid must be preserved by binary upgrades.
	 */
	retval->elemtype	= element_type;
	elog(INFO,"line [%03d] returning array retval->elemtype=:%d",__LINE__,SET_ELEMTYPE(retval));

    for (int j = 0; j < retval->nelements; j ++)
    {
        elog(INFO,"[%03d] test nullptr: %d", j,*(nullsPtr+j));
    }
	CopySetEls(retval,
				 dataPtr, nullsPtr, nitems,
				 typlen, typbyval, typalign,
				 true);

    if(SET_HASNULL(retval))
    {
        for (int j = 0; j < retval->nelements; j ++)
        {
            elog(INFO," line[%03d]: [%03d] after copy: %d",__LINE__,j,*(SET_NULLBITMAP(retval)+j));
        }
    }

	pfree(dataPtr);
	pfree(nullsPtr);
	// PG_RETURN_ARRAYTYPE_P(retval);
    PG_RETURN_BOOL(true);
}

Re: Do we want a hashset type?

Reply via email to