commit 184a18a36774c268dd63e2b3c1e970de86eedbbc
Author: Joel Jakobsson <joel@compiler.org>
Date:   Fri Jun 16 08:52:09 2023 +0200

    Add customizable params to int4hashset() and collision count function
    
    This commit enhances int4hashset() by introducing adjustable capacity,
    load, and growth factors, providing flexibility for performance optimization.
    
    Also added is a new function, hashset_collisions(), to report collision
    counts, aiding in performance tuning.
    
    Aggregate functions are renamed to hashset_agg() for consistency with
    array_agg() and range_agg().
    
    A new test file, test/sql/benchmark.sql, is added for evaluating the
    performance of hash functions. It's not run automatically by
    make installcheck.
    
    The adjustable parameters and the naive hash function are useful for testing
    and performance comparison. However, to keep things simple and streamlined
    for users, these features are likely to be removed in the final release,
    emphasizing the use of well-optimized default settings.
    
    SQL-function indentation is also adjusted to align with the PostgreSQL
    source repo, improving readability.
    
    In the benchmark results below, it was a bit surprising the naive hash
    function had no collisions, but that only held true when the input
    elements were sequential integers. When tested with random integers,
    all three hash functions caused collisions.
    
    Timing results not statistical significant, the purpose is just to
    give an idea of the execution times.
    
    *** Elements in sequence 1..100000
    - Testing default hash function (Jenkins/lookup3)
    psql:test/sql/benchmark.sql:23: NOTICE:  hashset_count: 100000
    psql:test/sql/benchmark.sql:23: NOTICE:  hashset_capacity: 262144
    psql:test/sql/benchmark.sql:23: NOTICE:  hashset_collisions: 31195
    DO
    Time: 1342.564 ms (00:01.343)
    - Testing Murmurhash32
    psql:test/sql/benchmark.sql:40: NOTICE:  hashset_count: 100000
    psql:test/sql/benchmark.sql:40: NOTICE:  hashset_capacity: 262144
    psql:test/sql/benchmark.sql:40: NOTICE:  hashset_collisions: 30879
    DO
    Time: 1297.823 ms (00:01.298)
    - Testing naive hash function
    psql:test/sql/benchmark.sql:57: NOTICE:  hashset_count: 100000
    psql:test/sql/benchmark.sql:57: NOTICE:  hashset_capacity: 262144
    psql:test/sql/benchmark.sql:57: NOTICE:  hashset_collisions: 0
    DO
    Time: 1400.936 ms (00:01.401)
    *** Testing 100000 random ints
     setseed
    ---------
    
    (1 row)
    
    Time: 3.591 ms
    - Testing default hash function (Jenkins/lookup3)
    psql:test/sql/benchmark.sql:77: NOTICE:  hashset_count: 100000
    psql:test/sql/benchmark.sql:77: NOTICE:  hashset_capacity: 262144
    psql:test/sql/benchmark.sql:77: NOTICE:  hashset_collisions: 30919
    DO
    Time: 1415.497 ms (00:01.415)
     setseed
    ---------
    
    (1 row)
    
    Time: 1.282 ms
    - Testing Murmurhash32
    psql:test/sql/benchmark.sql:95: NOTICE:  hashset_count: 100000
    psql:test/sql/benchmark.sql:95: NOTICE:  hashset_capacity: 262144
    psql:test/sql/benchmark.sql:95: NOTICE:  hashset_collisions: 30812
    DO
    Time: 2079.202 ms (00:02.079)
     setseed
    ---------
    
    (1 row)
    
    Time: 0.122 ms
    - Testing naive hash function
    psql:test/sql/benchmark.sql:113: NOTICE:  hashset_count: 100000
    psql:test/sql/benchmark.sql:113: NOTICE:  hashset_capacity: 262144
    psql:test/sql/benchmark.sql:113: NOTICE:  hashset_collisions: 30822
    DO
    Time: 1613.965 ms (00:01.614)

diff --git a/README.md b/README.md
index 99237df..4a5e5a7 100644
--- a/README.md
+++ b/README.md
@@ -64,19 +64,28 @@ a variable-length type.
 
 ## Functions
 
-- `int4hashset() -> int4hashset`: Initialize an empty int4hashset with no capacity.
-- `int4hashset_with_capacity(int) -> int4hashset`: Initialize an empty int4hashset with given capacity.
+- `int4hashset([capacity int, load_factor float4, growth_factor float4, hashfn_id int4]) -> int4hashset`:
+  Initialize an empty int4hashset with optional parameters.
+    - `capacity` specifies the initial capacity, which is zero by default.
+    - `load_factor` represents the threshold for resizing the hashset and defaults to 0.75.
+    - `growth_factor` is the multiplier for resizing and defaults to 2.0.
+    - `hashfn_id` represents the hash function used.
+        - 1=Jenkins/lookup3 (default)
+        - 2=MurmurHash32
+        - 3=Naive hash function
 - `hashset_add(int4hashset, int) -> int4hashset`: Adds an integer to an int4hashset.
 - `hashset_contains(int4hashset, int) -> boolean`: Checks if an int4hashset contains a given integer.
 - `hashset_merge(int4hashset, int4hashset) -> int4hashset`: Merges two int4hashsets into a new int4hashset.
 - `hashset_to_array(int4hashset) -> int[]`: Converts an int4hashset to an array of integers.
 - `hashset_count(int4hashset) -> bigint`: Returns the number of elements in an int4hashset.
 - `hashset_capacity(int4hashset) -> bigint`: Returns the current capacity of an int4hashset.
+- `hashset_load_factor(int4hashset) -> float4`: Returns the load factor of an int4hashset.
+- `hashset_growth_factor(int4hashset) -> float4`: Returns the growth factor of an int4hashset.
 
 ## Aggregation Functions
 
-- `hashset(int) -> int4hashset`: Aggregate integers into a hashset.
-- `hashset(int4hashset) -> int4hashset`: Aggregate hashsets into a hashset.
+- `hashset_agg(int) -> int4hashset`: Aggregate integers into a hashset.
+- `hashset_agg(int4hashset) -> int4hashset`: Aggregate hashsets into a hashset.
 
 
 ## Operators
diff --git a/hashset--0.0.1.sql b/hashset--0.0.1.sql
index ea559ca..20d019d 100644
--- a/hashset--0.0.1.sql
+++ b/hashset--0.0.1.sql
@@ -5,24 +5,24 @@
 CREATE TYPE int4hashset;
 
 CREATE OR REPLACE FUNCTION int4hashset_in(cstring)
-    RETURNS int4hashset
-    AS 'hashset', 'int4hashset_in'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS int4hashset
+AS 'hashset', 'int4hashset_in'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OR REPLACE FUNCTION int4hashset_out(int4hashset)
-    RETURNS cstring
-    AS 'hashset', 'int4hashset_out'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS cstring
+AS 'hashset', 'int4hashset_out'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OR REPLACE FUNCTION int4hashset_send(int4hashset)
-    RETURNS bytea
-    AS 'hashset', 'int4hashset_send'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS bytea
+AS 'hashset', 'int4hashset_send'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OR REPLACE FUNCTION int4hashset_recv(internal)
-    RETURNS int4hashset
-    AS 'hashset', 'int4hashset_recv'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS int4hashset
+AS 'hashset', 'int4hashset_recv'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE TYPE int4hashset (
     INPUT = int4hashset_in,
@@ -37,67 +37,71 @@ CREATE TYPE int4hashset (
  * Hashset Functions
  */
 
-CREATE OR REPLACE FUNCTION int4hashset()
-    RETURNS int4hashset
-    AS 'hashset', 'int4hashset_init'
-    LANGUAGE C IMMUTABLE;
-
-CREATE OR REPLACE FUNCTION int4hashset_with_capacity(int)
-    RETURNS int4hashset
-    AS 'hashset', 'int4hashset_init'
-    LANGUAGE C IMMUTABLE;
+CREATE OR REPLACE FUNCTION int4hashset(
+    capacity int DEFAULT 0,
+    load_factor float4 DEFAULT 0.75,
+    growth_factor float4 DEFAULT 2.0,
+    hashfn_id int DEFAULT 1
+)
+RETURNS int4hashset
+AS 'hashset', 'int4hashset_init'
+LANGUAGE C IMMUTABLE;
 
 CREATE OR REPLACE FUNCTION hashset_add(int4hashset, int)
-    RETURNS int4hashset
-    AS 'hashset', 'int4hashset_add'
-    LANGUAGE C IMMUTABLE;
+RETURNS int4hashset
+AS 'hashset', 'int4hashset_add'
+LANGUAGE C IMMUTABLE;
 
 CREATE OR REPLACE FUNCTION hashset_contains(int4hashset, int)
-    RETURNS bool
-    AS 'hashset', 'int4hashset_contains'
-    LANGUAGE C IMMUTABLE;
+RETURNS bool
+AS 'hashset', 'int4hashset_contains'
+LANGUAGE C IMMUTABLE;
 
 CREATE OR REPLACE FUNCTION hashset_merge(int4hashset, int4hashset)
-    RETURNS int4hashset
-    AS 'hashset', 'int4hashset_merge'
-    LANGUAGE C IMMUTABLE;
+RETURNS int4hashset
+AS 'hashset', 'int4hashset_merge'
+LANGUAGE C IMMUTABLE;
 
 CREATE OR REPLACE FUNCTION hashset_to_array(int4hashset)
-    RETURNS int[]
-    AS 'hashset', 'int4hashset_to_array'
-    LANGUAGE C IMMUTABLE;
+RETURNS int[]
+AS 'hashset', 'int4hashset_to_array'
+LANGUAGE C IMMUTABLE;
 
 CREATE OR REPLACE FUNCTION hashset_count(int4hashset)
-    RETURNS bigint
-    AS 'hashset', 'int4hashset_count'
-    LANGUAGE C IMMUTABLE;
+RETURNS bigint
+AS 'hashset', 'int4hashset_count'
+LANGUAGE C IMMUTABLE;
 
 CREATE OR REPLACE FUNCTION hashset_capacity(int4hashset)
-    RETURNS bigint
-    AS 'hashset', 'int4hashset_capacity'
-    LANGUAGE C IMMUTABLE;
+RETURNS bigint
+AS 'hashset', 'int4hashset_capacity'
+LANGUAGE C IMMUTABLE;
 
+CREATE OR REPLACE FUNCTION hashset_collisions(int4hashset)
+RETURNS bigint
+AS 'hashset', 'int4hashset_collisions'
+LANGUAGE C IMMUTABLE;
 
 /*
  * Aggregation Functions
  */
 
 CREATE OR REPLACE FUNCTION int4hashset_agg_add(p_pointer internal, p_value int)
-    RETURNS internal
-    AS 'hashset', 'int4hashset_agg_add'
-    LANGUAGE C IMMUTABLE;
+RETURNS internal
+AS 'hashset', 'int4hashset_agg_add'
+LANGUAGE C IMMUTABLE;
     
 CREATE OR REPLACE FUNCTION int4hashset_agg_final(p_pointer internal)
-    RETURNS int4hashset
-    AS 'hashset', 'int4hashset_agg_final'
-    LANGUAGE C IMMUTABLE;
+RETURNS int4hashset
+AS 'hashset', 'int4hashset_agg_final'
+LANGUAGE C IMMUTABLE;
     
 CREATE OR REPLACE FUNCTION int4hashset_agg_combine(p_pointer internal, p_pointer2 internal)
-    RETURNS internal
-    AS 'hashset', 'int4hashset_agg_combine'
-    LANGUAGE C IMMUTABLE;
+RETURNS internal
+AS 'hashset', 'int4hashset_agg_combine'
+LANGUAGE C IMMUTABLE;
 
-CREATE AGGREGATE hashset(int) (
+CREATE AGGREGATE hashset_agg(int) (
     SFUNC = int4hashset_agg_add,
     STYPE = internal,
     FINALFUNC = int4hashset_agg_final,
@@ -106,21 +110,21 @@ CREATE AGGREGATE hashset(int) (
 );
 
 CREATE OR REPLACE FUNCTION int4hashset_agg_add_set(p_pointer internal, p_value int4hashset)
-    RETURNS internal
-    AS 'hashset', 'int4hashset_agg_add_set'
-    LANGUAGE C IMMUTABLE;
+RETURNS internal
+AS 'hashset', 'int4hashset_agg_add_set'
+LANGUAGE C IMMUTABLE;
     
 CREATE OR REPLACE FUNCTION int4hashset_agg_final(p_pointer internal)
-    RETURNS int4hashset
-    AS 'hashset', 'int4hashset_agg_final'
-    LANGUAGE C IMMUTABLE;
+RETURNS int4hashset
+AS 'hashset', 'int4hashset_agg_final'
+LANGUAGE C IMMUTABLE;
 
 CREATE OR REPLACE FUNCTION int4hashset_agg_combine(p_pointer internal, p_pointer2 internal)
-    RETURNS internal
-    AS 'hashset', 'int4hashset_agg_combine'
-    LANGUAGE C IMMUTABLE;
+RETURNS internal
+AS 'hashset', 'int4hashset_agg_combine'
+LANGUAGE C IMMUTABLE;
 
-CREATE AGGREGATE hashset(int4hashset) (
+CREATE AGGREGATE hashset_agg(int4hashset) (
     SFUNC = int4hashset_agg_add_set,
     STYPE = internal,
     FINALFUNC = int4hashset_agg_final,
@@ -133,9 +137,9 @@ CREATE AGGREGATE hashset(int4hashset) (
  */
 
 CREATE OR REPLACE FUNCTION hashset_equals(int4hashset, int4hashset)
-    RETURNS bool
-    AS 'hashset', 'int4hashset_equals'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS bool
+AS 'hashset', 'int4hashset_equals'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OPERATOR = (
     LEFTARG = int4hashset,
@@ -146,9 +150,9 @@ CREATE OPERATOR = (
 );
 
 CREATE OR REPLACE FUNCTION hashset_neq(int4hashset, int4hashset)
-    RETURNS bool
-    AS 'hashset', 'int4hashset_neq'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS bool
+AS 'hashset', 'int4hashset_neq'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OPERATOR <> (
     LEFTARG = int4hashset,
@@ -166,43 +170,43 @@ CREATE OPERATOR <> (
  */
 
 CREATE OR REPLACE FUNCTION hashset_hash(int4hashset)
-    RETURNS integer
-    AS 'hashset', 'int4hashset_hash'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS integer
+AS 'hashset', 'int4hashset_hash'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OPERATOR CLASS int4hashset_hash_ops
-    DEFAULT FOR TYPE int4hashset USING hash AS
-    OPERATOR 1 = (int4hashset, int4hashset),
-    FUNCTION 1 hashset_hash(int4hashset);
+DEFAULT FOR TYPE int4hashset USING hash AS
+OPERATOR 1 = (int4hashset, int4hashset),
+FUNCTION 1 hashset_hash(int4hashset);
 
 /*
  * Hashset Btree Operators
  */
 
 CREATE OR REPLACE FUNCTION hashset_lt(int4hashset, int4hashset)
-    RETURNS bool
-    AS 'hashset', 'int4hashset_lt'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS bool
+AS 'hashset', 'int4hashset_lt'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OR REPLACE FUNCTION hashset_le(int4hashset, int4hashset)
-    RETURNS boolean
-    AS 'hashset', 'int4hashset_le'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS boolean
+AS 'hashset', 'int4hashset_le'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OR REPLACE FUNCTION hashset_gt(int4hashset, int4hashset)
-    RETURNS boolean
-    AS 'hashset', 'int4hashset_gt'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS boolean
+AS 'hashset', 'int4hashset_gt'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OR REPLACE FUNCTION hashset_ge(int4hashset, int4hashset)
-    RETURNS boolean
-    AS 'hashset', 'int4hashset_ge'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS boolean
+AS 'hashset', 'int4hashset_ge'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OR REPLACE FUNCTION hashset_cmp(int4hashset, int4hashset)
-    RETURNS integer
-    AS 'hashset', 'int4hashset_cmp'
-    LANGUAGE C IMMUTABLE STRICT;
+RETURNS integer
+AS 'hashset', 'int4hashset_cmp'
+LANGUAGE C IMMUTABLE STRICT;
 
 CREATE OPERATOR < (
     PROCEDURE = hashset_lt,
@@ -245,10 +249,10 @@ CREATE OPERATOR >= (
 );
 
 CREATE OPERATOR CLASS int4hashset_btree_ops
-    DEFAULT FOR TYPE int4hashset USING btree AS
-    OPERATOR 1 < (int4hashset, int4hashset),
-    OPERATOR 2 <= (int4hashset, int4hashset),
-    OPERATOR 3 = (int4hashset, int4hashset),
-    OPERATOR 4 >= (int4hashset, int4hashset),
-    OPERATOR 5 > (int4hashset, int4hashset),
-    FUNCTION 1 hashset_cmp(int4hashset, int4hashset);
+DEFAULT FOR TYPE int4hashset USING btree AS
+OPERATOR 1 < (int4hashset, int4hashset),
+OPERATOR 2 <= (int4hashset, int4hashset),
+OPERATOR 3 = (int4hashset, int4hashset),
+OPERATOR 4 >= (int4hashset, int4hashset),
+OPERATOR 5 > (int4hashset, int4hashset),
+FUNCTION 1 hashset_cmp(int4hashset, int4hashset);
diff --git a/hashset.c b/hashset.c
index 569ae91..9a1bd3f 100644
--- a/hashset.c
+++ b/hashset.c
@@ -29,9 +29,12 @@ PG_MODULE_MAGIC;
 typedef struct int4hashset_t {
 	int32		vl_len_;		/* varlena header (do not touch directly!) */
 	int32		flags;			/* reserved for future use (versioning, ...) */
-	int32		maxelements;	/* max number of element we have space for */
+	int32		capacity;		/* max number of element we have space for */
 	int32		nelements;		/* number of items added to the hashset */
 	int32		hashfn_id;		/* ID of the hash function used */
+	float4		load_factor;	/* Load factor before triggering resize */
+	float4		growth_factor;	/* Growth factor when resizing the hashset */
+	int32		ncollisions;	/* Number of collisions */
 	char		data[FLEXIBLE_ARRAY_MEMBER];
 } int4hashset_t;
 
@@ -46,6 +49,16 @@ static Datum int32_to_array(FunctionCallInfo fcinfo, int32 * d, int len);
 #define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
 #define HASHSET_STEP 13
 #define JENKINS_LOOKUP3_HASHFN_ID 1
+#define MURMURHASH32_HASHFN_ID 2
+#define NAIVE_HASHFN_ID 3
+
+/*
+ * These defaults should match the the SQL function int4hashset()
+ */
+#define DEFAULT_INITIAL_CAPACITY 0
+#define DEFAULT_LOAD_FACTOR 0.75
+#define DEFAULT_GROWTH_FACTOR 2.0
+#define DEFAULT_HASHFN_ID JENKINS_LOOKUP3_HASHFN_ID
 
 PG_FUNCTION_INFO_V1(int4hashset_in);
 PG_FUNCTION_INFO_V1(int4hashset_out);
@@ -57,6 +70,7 @@ PG_FUNCTION_INFO_V1(int4hashset_count);
 PG_FUNCTION_INFO_V1(int4hashset_merge);
 PG_FUNCTION_INFO_V1(int4hashset_init);
 PG_FUNCTION_INFO_V1(int4hashset_capacity);
+PG_FUNCTION_INFO_V1(int4hashset_collisions);
 PG_FUNCTION_INFO_V1(int4hashset_agg_add_set);
 PG_FUNCTION_INFO_V1(int4hashset_agg_add);
 PG_FUNCTION_INFO_V1(int4hashset_agg_final);
@@ -81,6 +95,7 @@ Datum int4hashset_count(PG_FUNCTION_ARGS);
 Datum int4hashset_merge(PG_FUNCTION_ARGS);
 Datum int4hashset_init(PG_FUNCTION_ARGS);
 Datum int4hashset_capacity(PG_FUNCTION_ARGS);
+Datum int4hashset_collisions(PG_FUNCTION_ARGS);
 Datum int4hashset_agg_add(PG_FUNCTION_ARGS);
 Datum int4hashset_agg_add_set(PG_FUNCTION_ARGS);
 Datum int4hashset_agg_final(PG_FUNCTION_ARGS);
@@ -118,23 +133,28 @@ hashset_isspace(char ch)
 }
 
 static int4hashset_t *
-int4hashset_allocate(int maxelements)
+int4hashset_allocate(
+	int capacity,
+	float4 load_factor,
+	float4 growth_factor,
+	int hashfn_id
+)
 {
 	Size			len;
 	int4hashset_t  *set;
 	char		   *ptr;
 
 	/*
-	 * Ensure that maxelements is not divisible by HASHSET_STEP;
+	 * Ensure that capacity is not divisible by HASHSET_STEP;
 	 * i.e. the step size used in hashset_add_element()
 	 * and hashset_contains_element().
 	 */
-	while (maxelements % HASHSET_STEP == 0)
-		maxelements++;
+	while (capacity % HASHSET_STEP == 0)
+		capacity++;
 
 	len = offsetof(int4hashset_t, data);
-	len += CEIL_DIV(maxelements, 8);
-	len += maxelements * sizeof(int32);
+	len += CEIL_DIV(capacity, 8);
+	len += capacity * sizeof(int32);
 
 	ptr = palloc0(len);
 	SET_VARSIZE(ptr, len);
@@ -142,9 +162,11 @@ int4hashset_allocate(int maxelements)
 	set = (int4hashset_t *) ptr;
 
 	set->flags = 0;
-	set->maxelements = maxelements;
+	set->capacity = capacity;
 	set->nelements = 0;
-	set->hashfn_id = JENKINS_LOOKUP3_HASHFN_ID;
+	set->hashfn_id = hashfn_id;
+	set->load_factor = load_factor;
+	set->growth_factor = growth_factor;
 
 	set->flags |= 0;
 
@@ -176,7 +198,12 @@ int4hashset_in(PG_FUNCTION_ARGS)
 	str++;
 
 	/* Initial size based on input length (arbitrary, could be optimized) */
-	set = int4hashset_allocate(len/2);
+	set = int4hashset_allocate(
+		len/2,
+		DEFAULT_LOAD_FACTOR,
+		DEFAULT_GROWTH_FACTOR,
+		DEFAULT_HASHFN_ID
+	);
 
 	while (true)
 	{
@@ -202,7 +229,7 @@ int4hashset_in(PG_FUNCTION_ARGS)
 		}
 
 		/* Add the value to the hashset, resize if needed */
-		if (set->nelements >= set->maxelements)
+		if (set->nelements >= set->capacity)
 		{
 			set = int4hashset_resize(set);
 		}
@@ -261,7 +288,7 @@ int4hashset_out(PG_FUNCTION_ARGS)
 
 	/* Calculate the pointer to the bitmap and values array */
 	bitmap = set->data;
-	values = (int32 *) (set->data + CEIL_DIV(set->maxelements, 8));
+	values = (int32 *) (set->data + CEIL_DIV(set->capacity, 8));
 
 	/* Initialize the StringInfo buffer */
 	initStringInfo(&str);
@@ -270,7 +297,7 @@ int4hashset_out(PG_FUNCTION_ARGS)
 	appendStringInfoChar(&str, '{');
 
 	/* Loop through the elements and append them to the string */
-	for (i = 0; i < set->maxelements; i++)
+	for (i = 0; i < set->capacity; i++)
 	{
 		int byte = i / 8;
 		int bit = i % 8;
@@ -305,7 +332,7 @@ int4hashset_send(PG_FUNCTION_ARGS)
 
 	/* Send the non-data fields */
 	pq_sendint32(&buf, set->flags);
-	pq_sendint32(&buf, set->maxelements);
+	pq_sendint32(&buf, set->capacity);
 	pq_sendint32(&buf, set->nelements);
 	pq_sendint32(&buf, set->hashfn_id);
 
@@ -328,7 +355,7 @@ int4hashset_recv(PG_FUNCTION_ARGS)
 
 	/* Read fields from buffer */
 	int32 flags = pq_getmsgint(buf, 4);
-	int32 maxelements = pq_getmsgint(buf, 4);
+	int32 capacity = pq_getmsgint(buf, 4);
 	int32 nelements = pq_getmsgint(buf, 4);
 	int32 hashfn_id = pq_getmsgint(buf, 4);
 
@@ -349,7 +376,7 @@ int4hashset_recv(PG_FUNCTION_ARGS)
 
 	/* Populate the structure */
 	set->flags = flags;
-	set->maxelements = maxelements;
+	set->capacity = capacity;
 	set->nelements = nelements;
 	set->hashfn_id = hashfn_id;
 	memcpy(set->data, binary_data, data_size);
@@ -376,14 +403,14 @@ int4hashset_to_array(PG_FUNCTION_ARGS)
 	set = (int4hashset_t *) PG_GETARG_INT4HASHSET(0);
 
 	sbitmap = set->data;
-	svalues = (int32 *) (set->data + CEIL_DIV(set->maxelements, 8));
+	svalues = (int32 *) (set->data + CEIL_DIV(set->capacity, 8));
 
 	/* number of values to store in the array */
 	nvalues = set->nelements;
 	values = (int32 *) palloc(sizeof(int32) * nvalues);
 
 	idx = 0;
-	for (i = 0; i < set->maxelements; i++)
+	for (i = 0; i < set->capacity; i++)
 	{
 		int	byte = (i / 8);
 		int	bit = (i % 8);
@@ -426,15 +453,22 @@ static int4hashset_t *
 int4hashset_resize(int4hashset_t * set)
 {
 	int				i;
-	int4hashset_t	*new = int4hashset_allocate(set->maxelements * 2);
+	int4hashset_t	*new;
 	char			*bitmap;
 	int32			*values;
 
+	new = int4hashset_allocate(
+		set->capacity * 2,
+		set->load_factor,
+		set->growth_factor,
+		set->hashfn_id
+	);
+
 	/* Calculate the pointer to the bitmap and values array */
 	bitmap = set->data;
-	values = (int32 *) (set->data + CEIL_DIV(set->maxelements, 8));
+	values = (int32 *) (set->data + CEIL_DIV(set->capacity, 8));
 
-	for (i = 0; i < set->maxelements; i++)
+	for (i = 0; i < set->capacity; i++)
 	{
 		int	byte = (i / 8);
 		int	bit = (i % 8);
@@ -455,12 +489,20 @@ int4hashset_add_element(int4hashset_t *set, int32 value)
 	char   *bitmap;
 	int32  *values;
 
-	if (set->nelements > set->maxelements * 0.75)
+	if (set->nelements > set->capacity * set->load_factor)
 		set = int4hashset_resize(set);
 
 	if (set->hashfn_id == JENKINS_LOOKUP3_HASHFN_ID)
 	{
-		hash = hash_bytes_uint32((uint32) value) % set->maxelements;
+		hash = hash_bytes_uint32((uint32) value) % set->capacity;
+	}
+	else if (set->hashfn_id == MURMURHASH32_HASHFN_ID)
+	{
+		hash = murmurhash32((uint32) value) % set->capacity;
+	}
+	else if (set->hashfn_id == NAIVE_HASHFN_ID)
+	{
+		hash = ((uint32) value * 7691 + 4201) % set->capacity;
 	}
 	else
 	{
@@ -470,7 +512,7 @@ int4hashset_add_element(int4hashset_t *set, int32 value)
 	}
 
 	bitmap = set->data;
-	values = (int32 *) (set->data + CEIL_DIV(set->maxelements, 8));
+	values = (int32 *) (set->data + CEIL_DIV(set->capacity, 8));
 
 	while (true)
 	{
@@ -484,7 +526,10 @@ int4hashset_add_element(int4hashset_t *set, int32 value)
 			if (values[hash] == value)
 				break;
 
-			hash = (hash + HASHSET_STEP) % set->maxelements;
+			/* Increment the collision counter */
+			set->ncollisions++;
+
+			hash = (hash + HASHSET_STEP) % set->capacity;
 			continue;
 		}
 
@@ -512,7 +557,15 @@ int4hashset_contains_element(int4hashset_t *set, int32 value)
 
 	if (set->hashfn_id == JENKINS_LOOKUP3_HASHFN_ID)
 	{
-		hash = hash_bytes_uint32((uint32) value) % set->maxelements;
+		hash = hash_bytes_uint32((uint32) value) % set->capacity;
+	}
+	else if (set->hashfn_id == MURMURHASH32_HASHFN_ID)
+	{
+		hash = murmurhash32((uint32) value) % set->capacity;
+	}
+	else if (set->hashfn_id == NAIVE_HASHFN_ID)
+	{
+		hash = ((uint32) value * 7691 + 4201) % set->capacity;
 	}
 	else
 	{
@@ -522,7 +575,7 @@ int4hashset_contains_element(int4hashset_t *set, int32 value)
 	}
 
 	bitmap = set->data;
-	values = (int32 *) (set->data + CEIL_DIV(set->maxelements, 8));
+	values = (int32 *) (set->data + CEIL_DIV(set->capacity, 8));
 
 	while (true)
 	{
@@ -538,12 +591,12 @@ int4hashset_contains_element(int4hashset_t *set, int32 value)
 			return true;
 
 		/* move to the next element */
-		hash = (hash + HASHSET_STEP) % set->maxelements;
+		hash = (hash + HASHSET_STEP) % set->capacity;
 
 		num_probes++; /* Increment the number of probes */
 
 		/* Check if we have probed all slots */
-		if (num_probes >= set->maxelements)
+		if (num_probes >= set->capacity)
 			return false; /* Avoid infinite loop */
 	}
 }
@@ -563,7 +616,14 @@ int4hashset_add(PG_FUNCTION_ARGS)
 
 	/* if there's no hashset allocated, create it now */
 	if (PG_ARGISNULL(0))
-		set = int4hashset_allocate(64);
+	{
+		set = int4hashset_allocate(
+			DEFAULT_INITIAL_CAPACITY,
+			DEFAULT_LOAD_FACTOR,
+			DEFAULT_GROWTH_FACTOR,
+			DEFAULT_HASHFN_ID
+		);
+	}
 	else
 	{
 		/* make sure we are working with a non-toasted and non-shared copy of the input */
@@ -597,9 +657,9 @@ int4hashset_merge(PG_FUNCTION_ARGS)
 	setb = PG_GETARG_INT4HASHSET(1);
 
 	bitmap = setb->data;
-	values = (int32 *) (setb->data + CEIL_DIV(setb->maxelements, 8));
+	values = (int32 *) (setb->data + CEIL_DIV(setb->capacity, 8));
 
-	for (i = 0; i < setb->maxelements; i++)
+	for (i = 0; i < setb->capacity; i++)
 	{
 		int	byte = (i / 8);
 		int	bit = (i % 8);
@@ -614,19 +674,51 @@ int4hashset_merge(PG_FUNCTION_ARGS)
 Datum
 int4hashset_init(PG_FUNCTION_ARGS)
 {
-	if (PG_NARGS() == 0) {
-		/*
-		 * No initial capacity argument was passed,
-		 * allocate hashset with zero capacity
-		 */
-		PG_RETURN_POINTER(int4hashset_allocate(0));
-	} else {
-		/*
-		 * Initial capacity argument was passed,
-		 * allocate hashset with the specified capacity
-		 */
-		PG_RETURN_POINTER(int4hashset_allocate(PG_GETARG_INT32(0)));
+	int4hashset_t *set;
+	int32 initial_capacity = PG_GETARG_INT32(0);
+	float4 load_factor = PG_GETARG_FLOAT4(1);
+	float4 growth_factor = PG_GETARG_FLOAT4(2);
+	int32 hashfn_id = PG_GETARG_INT32(3);
+
+	/* Validate input arguments */
+	if (!(initial_capacity >= 0))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("initial capacity cannot be negative")));
 	}
+
+	if (!(load_factor > 0.0 && load_factor < 1.0))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("load factor must be between 0.0 and 1.0")));
+	}
+
+	if (!(growth_factor > 1.0))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("growth factor must be greater than 1.0")));
+	}
+
+	if (!(hashfn_id == JENKINS_LOOKUP3_HASHFN_ID ||
+	      hashfn_id == MURMURHASH32_HASHFN_ID ||
+		  hashfn_id == NAIVE_HASHFN_ID))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("Invalid hash function ID")));
+	}
+
+	set = int4hashset_allocate(
+		initial_capacity,
+		load_factor,
+		growth_factor,
+		hashfn_id
+	);
+
+	PG_RETURN_POINTER(set);
 }
 
 Datum
@@ -667,7 +759,20 @@ int4hashset_capacity(PG_FUNCTION_ARGS)
 
 	set = (int4hashset_t *) PG_GETARG_POINTER(0);
 
-	PG_RETURN_INT64(set->maxelements);
+	PG_RETURN_INT64(set->capacity);
+}
+
+Datum
+int4hashset_collisions(PG_FUNCTION_ARGS)
+{
+	int4hashset_t	*set;
+
+	if (PG_ARGISNULL(0))
+		PG_RETURN_NULL();
+
+	set = PG_GETARG_INT4HASHSET(0);
+
+	PG_RETURN_INT64(set->ncollisions);
 }
 
 Datum
@@ -699,7 +804,12 @@ int4hashset_agg_add(PG_FUNCTION_ARGS)
 	if (PG_ARGISNULL(0))
 	{
 		oldcontext = MemoryContextSwitchTo(aggcontext);
-		state = int4hashset_allocate(64);
+		state = int4hashset_allocate(
+			DEFAULT_INITIAL_CAPACITY,
+			DEFAULT_LOAD_FACTOR,
+			DEFAULT_GROWTH_FACTOR,
+			DEFAULT_HASHFN_ID
+		);
 		MemoryContextSwitchTo(oldcontext);
 	}
 	else
@@ -741,7 +851,12 @@ int4hashset_agg_add_set(PG_FUNCTION_ARGS)
 	if (PG_ARGISNULL(0))
 	{
 		oldcontext = MemoryContextSwitchTo(aggcontext);
-		state = int4hashset_allocate(64);
+		state = int4hashset_allocate(
+			DEFAULT_INITIAL_CAPACITY,
+			DEFAULT_LOAD_FACTOR,
+			DEFAULT_GROWTH_FACTOR,
+			DEFAULT_HASHFN_ID
+		);
 		MemoryContextSwitchTo(oldcontext);
 	}
 	else
@@ -758,9 +873,9 @@ int4hashset_agg_add_set(PG_FUNCTION_ARGS)
 		value = PG_GETARG_INT4HASHSET(1);
 
 		bitmap = value->data;
-		values = (int32 *) (value->data + CEIL_DIV(value->maxelements, 8));
+		values = (int32 *) (value->data + CEIL_DIV(value->capacity, 8));
 
-		for (i = 0; i < value->maxelements; i++)
+		for (i = 0; i < value->capacity; i++)
 		{
 			int	byte = (i / 8);
 			int	bit = (i % 8);
@@ -835,9 +950,9 @@ int4hashset_agg_combine(PG_FUNCTION_ARGS)
 	dst = (int4hashset_t *) PG_GETARG_POINTER(0);
 
 	bitmap = src->data;
-	values = (int32 *) (src->data + CEIL_DIV(src->maxelements, 8));
+	values = (int32 *) (src->data + CEIL_DIV(src->capacity, 8));
 
-	for (i = 0; i < src->maxelements; i++)
+	for (i = 0; i < src->capacity; i++)
 	{
 		int	byte = (i / 8);
 		int	bit = (i % 8);
@@ -868,12 +983,12 @@ int4hashset_equals(PG_FUNCTION_ARGS)
 		PG_RETURN_BOOL(false);
 
 	bitmap_a = a->data;
-	values_a = (int32 *)(a->data + CEIL_DIV(a->maxelements, 8));
+	values_a = (int32 *)(a->data + CEIL_DIV(a->capacity, 8));
 
 	/*
 	 * Check if every element in a is also in b
 	 */
-	for (i = 0; i < a->maxelements; i++)
+	for (i = 0; i < a->capacity; i++)
 	{
 		int byte = (i / 8);
 		int bit = (i % 8);
@@ -918,10 +1033,10 @@ Datum int4hashset_hash(PG_FUNCTION_ARGS)
 
     /* Access the data array */
     char *bitmap = set->data;
-    int32 *values = (int32 *)(set->data + CEIL_DIV(set->maxelements, 8));
+    int32 *values = (int32 *)(set->data + CEIL_DIV(set->capacity, 8));
 
     /* Iterate through all elements */
-    for (int32 i = 0; i < set->maxelements; i++)
+    for (int32 i = 0; i < set->capacity; i++)
     {
         int byte = i / 8;
         int bit = i % 8;
@@ -1010,13 +1125,13 @@ int4hashset_cmp(PG_FUNCTION_ARGS)
 	int i = 0, j = 0;
 
 	bitmap_a = a->data;
-	values_a = (int32 *)(a->data + CEIL_DIV(a->maxelements, 8));
+	values_a = (int32 *)(a->data + CEIL_DIV(a->capacity, 8));
 
 	bitmap_b = b->data;
-	values_b = (int32 *)(b->data + CEIL_DIV(b->maxelements, 8));
+	values_b = (int32 *)(b->data + CEIL_DIV(b->capacity, 8));
 
 	/* Iterate over the elements in each hashset independently */
-	while(i < a->maxelements && j < b->maxelements)
+	while(i < a->capacity && j < b->capacity)
 	{
 		int byte_a = (i / 8);
 		int bit_a = (i % 8);
@@ -1057,9 +1172,9 @@ int4hashset_cmp(PG_FUNCTION_ARGS)
 	 * If all compared elements are equal,
 	 * then compare the remaining elements in the larger hashset
 	 */
-	if (i < a->maxelements)
+	if (i < a->capacity)
 		PG_RETURN_INT32(1);
-	else if (j < b->maxelements)
+	else if (j < b->capacity)
 		PG_RETURN_INT32(-1);
 	else
 		PG_RETURN_INT32(0);
diff --git a/test/expected/basic.out b/test/expected/basic.out
index a793ef2..65be2a6 100644
--- a/test/expected/basic.out
+++ b/test/expected/basic.out
@@ -30,15 +30,20 @@ LINE 1: SELECT '{2147483648}'::int4hashset;
 /*
  * Hashset Functions
  */
-SELECT int4hashset(); -- init empty int4hashset with no capacity
+SELECT int4hashset();
  int4hashset 
 -------------
  {}
 (1 row)
 
-SELECT int4hashset_with_capacity(10); -- init empty int4hashset with specified capacity
- int4hashset_with_capacity 
----------------------------
+SELECT int4hashset(
+    capacity := 10,
+    load_factor := 0.9,
+    growth_factor := 1.1,
+    hashfn_id := 1
+);
+ int4hashset 
+-------------
  {}
 (1 row)
 
@@ -90,7 +95,7 @@ SELECT hashset_count('{1,2,3}'::int4hashset); -- 3
              3
 (1 row)
 
-SELECT hashset_capacity(int4hashset_with_capacity(10)); -- 10
+SELECT hashset_capacity(int4hashset(capacity := 10)); -- 10
  hashset_capacity 
 ------------------
                10
@@ -99,21 +104,21 @@ SELECT hashset_capacity(int4hashset_with_capacity(10)); -- 10
 /*
  * Aggregation Functions
  */
-SELECT hashset(i) FROM generate_series(1,10) AS i;
-        hashset         
+SELECT hashset_agg(i) FROM generate_series(1,10) AS i;
+      hashset_agg       
 ------------------------
- {8,1,10,3,9,4,6,2,5,7}
+ {6,10,1,8,2,3,4,5,9,7}
 (1 row)
 
-SELECT hashset(h) FROM
+SELECT hashset_agg(h) FROM
 (
-    SELECT hashset(i) AS h FROM generate_series(1,5) AS i
+    SELECT hashset_agg(i) AS h FROM generate_series(1,5) AS i
     UNION ALL
-    SELECT hashset(j) AS h FROM generate_series(6,10) AS j
+    SELECT hashset_agg(j) AS h FROM generate_series(6,10) AS j
 ) q;
-        hashset         
+      hashset_agg       
 ------------------------
- {8,1,10,3,9,4,6,2,5,7}
+ {6,8,1,3,2,10,4,5,9,7}
 (1 row)
 
 /*
diff --git a/test/expected/order.out b/test/expected/order.out
index 089bd15..2eb321c 100644
--- a/test/expected/order.out
+++ b/test/expected/order.out
@@ -14,7 +14,7 @@ DECLARE
   element INT;
   random_set int4hashset;
 BEGIN
-  random_set := int4hashset_with_capacity(num_elements);
+  random_set := int4hashset(capacity := num_elements);
 
   FOR i IN 1..num_elements LOOP
     element := floor(random() * 1000)::INT;
diff --git a/test/expected/reported_bugs.out b/test/expected/reported_bugs.out
index 226e81c..860370d 100644
--- a/test/expected/reported_bugs.out
+++ b/test/expected/reported_bugs.out
@@ -12,16 +12,16 @@
  * hashset, thereby preventing alteration of the original data.
  */
 SELECT
-    q.hashset,
-    hashset_add(hashset,4)
+    q.hashset_agg,
+    hashset_add(hashset_agg,4)
 FROM
 (
     SELECT
-        hashset(generate_series)
+        hashset_agg(generate_series)
     FROM generate_series(1,3)
 ) q;
- hashset | hashset_add 
----------+-------------
- {1,3,2} | {1,3,4,2}
+ hashset_agg | hashset_add 
+-------------+-------------
+ {3,1,2}     | {3,4,1,2}
 (1 row)
 
diff --git a/test/expected/table.out b/test/expected/table.out
index 3c020b6..9793a49 100644
--- a/test/expected/table.out
+++ b/test/expected/table.out
@@ -1,6 +1,6 @@
 CREATE TABLE users (
     user_id int PRIMARY KEY,
-    user_likes int4hashset DEFAULT int4hashset_with_capacity(2)
+    user_likes int4hashset DEFAULT int4hashset(capacity := 2)
 );
 INSERT INTO users (user_id) VALUES (1);
 UPDATE users SET user_likes = hashset_add(user_likes, 101) WHERE user_id = 1;
diff --git a/test/sql/basic.sql b/test/sql/basic.sql
index 563c626..4882895 100644
--- a/test/sql/basic.sql
+++ b/test/sql/basic.sql
@@ -12,8 +12,13 @@ SELECT '{2147483648}'::int4hashset; -- out of range
  * Hashset Functions
  */
 
-SELECT int4hashset(); -- init empty int4hashset with no capacity
-SELECT int4hashset_with_capacity(10); -- init empty int4hashset with specified capacity
+SELECT int4hashset();
+SELECT int4hashset(
+    capacity := 10,
+    load_factor := 0.9,
+    growth_factor := 1.1,
+    hashfn_id := 1
+);
 SELECT hashset_add(int4hashset(), 123);
 SELECT hashset_add(NULL::int4hashset, 123);
 SELECT hashset_add('{123}'::int4hashset, 456);
@@ -22,19 +27,19 @@ SELECT hashset_contains('{123,456}'::int4hashset, 789); -- false
 SELECT hashset_merge('{1,2}'::int4hashset, '{2,3}'::int4hashset);
 SELECT hashset_to_array('{1,2,3}'::int4hashset);
 SELECT hashset_count('{1,2,3}'::int4hashset); -- 3
-SELECT hashset_capacity(int4hashset_with_capacity(10)); -- 10
+SELECT hashset_capacity(int4hashset(capacity := 10)); -- 10
 
 /*
  * Aggregation Functions
  */
 
-SELECT hashset(i) FROM generate_series(1,10) AS i;
+SELECT hashset_agg(i) FROM generate_series(1,10) AS i;
 
-SELECT hashset(h) FROM
+SELECT hashset_agg(h) FROM
 (
-    SELECT hashset(i) AS h FROM generate_series(1,5) AS i
+    SELECT hashset_agg(i) AS h FROM generate_series(1,5) AS i
     UNION ALL
-    SELECT hashset(j) AS h FROM generate_series(6,10) AS j
+    SELECT hashset_agg(j) AS h FROM generate_series(6,10) AS j
 ) q;
 
 /*
diff --git a/test/sql/benchmark.sql b/test/sql/benchmark.sql
new file mode 100644
index 0000000..6f825dc
--- /dev/null
+++ b/test/sql/benchmark.sql
@@ -0,0 +1,110 @@
+DROP EXTENSION IF EXISTS hashset;
+CREATE EXTENSION hashset;
+
+\timing on
+
+\echo *** Elements in sequence 1..100000
+
+\echo - Testing default hash function (Jenkins/lookup3)
+
+DO
+$$
+DECLARE
+    h int4hashset;
+BEGIN
+    h := int4hashset(hashfn_id := 1);
+    FOR i IN 1..100000 LOOP
+        h := hashset_add(h, i);
+    END LOOP;
+    RAISE NOTICE 'hashset_count: %', hashset_count(h);
+    RAISE NOTICE 'hashset_capacity: %', hashset_capacity(h);
+    RAISE NOTICE 'hashset_collisions: %', hashset_collisions(h);
+END
+$$ LANGUAGE plpgsql;
+
+\echo - Testing Murmurhash32
+
+DO
+$$
+DECLARE
+    h int4hashset;
+BEGIN
+    h := int4hashset(hashfn_id := 2);
+    FOR i IN 1..100000 LOOP
+        h := hashset_add(h, i);
+    END LOOP;
+    RAISE NOTICE 'hashset_count: %', hashset_count(h);
+    RAISE NOTICE 'hashset_capacity: %', hashset_capacity(h);
+    RAISE NOTICE 'hashset_collisions: %', hashset_collisions(h);
+END
+$$ LANGUAGE plpgsql;
+
+\echo - Testing naive hash function
+
+DO
+$$
+DECLARE
+    h int4hashset;
+BEGIN
+    h := int4hashset(hashfn_id := 3);
+    FOR i IN 1..100000 LOOP
+        h := hashset_add(h, i);
+    END LOOP;
+    RAISE NOTICE 'hashset_count: %', hashset_count(h);
+    RAISE NOTICE 'hashset_capacity: %', hashset_capacity(h);
+    RAISE NOTICE 'hashset_collisions: %', hashset_collisions(h);
+END
+$$ LANGUAGE plpgsql;
+
+\echo *** Testing 100000 random ints
+
+\echo - Testing default hash function (Jenkins/lookup3)
+
+DO
+$$
+DECLARE
+    h int4hashset;
+BEGIN
+    h := int4hashset(hashfn_id := 1);
+    FOR i IN 1..100000 LOOP
+        h := hashset_add(h, (floor(4294967296 * random()) - 2147483648)::int);
+    END LOOP;
+    RAISE NOTICE 'hashset_count: %', hashset_count(h);
+    RAISE NOTICE 'hashset_capacity: %', hashset_capacity(h);
+    RAISE NOTICE 'hashset_collisions: %', hashset_collisions(h);
+END
+$$ LANGUAGE plpgsql;
+
+\echo - Testing Murmurhash32
+
+DO
+$$
+DECLARE
+    h int4hashset;
+BEGIN
+    h := int4hashset(hashfn_id := 2);
+    FOR i IN 1..100000 LOOP
+        h := hashset_add(h, (floor(4294967296 * random()) - 2147483648)::int);
+    END LOOP;
+    RAISE NOTICE 'hashset_count: %', hashset_count(h);
+    RAISE NOTICE 'hashset_capacity: %', hashset_capacity(h);
+    RAISE NOTICE 'hashset_collisions: %', hashset_collisions(h);
+END
+$$ LANGUAGE plpgsql;
+
+\echo - Testing naive hash function
+
+DO
+$$
+DECLARE
+    h int4hashset;
+BEGIN
+    h := int4hashset(hashfn_id := 3);
+    FOR i IN 1..100000 LOOP
+        h := hashset_add(h, (floor(4294967296 * random()) - 2147483648)::int);
+    END LOOP;
+    RAISE NOTICE 'hashset_count: %', hashset_count(h);
+    RAISE NOTICE 'hashset_capacity: %', hashset_capacity(h);
+    RAISE NOTICE 'hashset_collisions: %', hashset_collisions(h);
+END
+$$ LANGUAGE plpgsql;
diff --git a/test/sql/order.sql b/test/sql/order.sql
index 2dcdb39..ba6af17 100644
--- a/test/sql/order.sql
+++ b/test/sql/order.sql
@@ -10,7 +10,7 @@ DECLARE
   element INT;
   random_set int4hashset;
 BEGIN
-  random_set := int4hashset_with_capacity(num_elements);
+  random_set := int4hashset(capacity := num_elements);
 
   FOR i IN 1..num_elements LOOP
     element := floor(random() * 1000)::INT;
diff --git a/test/sql/reported_bugs.sql b/test/sql/reported_bugs.sql
index fcd0b9d..a47a6f0 100644
--- a/test/sql/reported_bugs.sql
+++ b/test/sql/reported_bugs.sql
@@ -12,11 +12,11 @@
  * hashset, thereby preventing alteration of the original data.
  */
 SELECT
-    q.hashset,
-    hashset_add(hashset,4)
+    q.hashset_agg,
+    hashset_add(hashset_agg,4)
 FROM
 (
     SELECT
-        hashset(generate_series)
+        hashset_agg(generate_series)
     FROM generate_series(1,3)
 ) q;
diff --git a/test/sql/table.sql b/test/sql/table.sql
index a63253f..0472352 100644
--- a/test/sql/table.sql
+++ b/test/sql/table.sql
@@ -1,6 +1,6 @@
 CREATE TABLE users (
     user_id int PRIMARY KEY,
-    user_likes int4hashset DEFAULT int4hashset_with_capacity(2)
+    user_likes int4hashset DEFAULT int4hashset(capacity := 2)
 );
 INSERT INTO users (user_id) VALUES (1);
 UPDATE users SET user_likes = hashset_add(user_likes, 101) WHERE user_id = 1;
