Hello Andres,

If not, do you think advisable to spend time improving the evaluator & variable stuff and possibly other places for an overall 15% gain?

Also, what would be the likelyhood of such optimization patch to pass?

I could do a limited variable management improvement patch, eventually, I have funny ideas to speedup the thing, some of which outlined above, some others even more terrible.

Attached is a quick PoC.

  sh> cat set.sql
  \set x 0
  \set y :x
  \set z :y
  \set w :z
  \set v :w
  \set x :v
  \set y :x
  \set z :y
  \set w :z
  \set v :w
  \set x :v
  \set y :x
  \set z :y
  \set w :z
  \set v :w
  \set x1 :x
  \set x2 :x
  \set y1 :z
  \set y0 :w
  \set helloworld :x

Before the patch:

  sh> ./pgbench -T 10 -f vars.sql
  ...
  tps = 802966.183240 (excluding connections establishing) # 0.8M

After the patch:

  sh> ./pgbench -T 10 -f vars.sql
  ...
  tps = 2665382.878271 (excluding connections establishing) # 2.6M

Which is a (somehow disappointing) * 3.3 speedup. The impact on the 3 complex expressions tests is not measurable, though.

Probably variable management should be reworked more deeply to cleanup the code.

Questions:
 - how likely is such a patch to pass? (IMHO not likely)
 - what is its impact to overall performance when actual queries
   are performed (IMHO very small).

--
Fabien.
diff --git a/src/bin/pgbench/Makefile b/src/bin/pgbench/Makefile
index 25abd0a875..39bba12c23 100644
--- a/src/bin/pgbench/Makefile
+++ b/src/bin/pgbench/Makefile
@@ -7,7 +7,7 @@ subdir = src/bin/pgbench
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = pgbench.o exprparse.o $(WIN32RES)
+OBJS = pgbench.o exprparse.o symbol_table.o $(WIN32RES)
 
 override CPPFLAGS := -I. -I$(srcdir) -I$(libpq_srcdir) $(CPPFLAGS)
 LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index 17c9ec32c5..e5a15ae136 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -212,6 +212,7 @@ make_variable(char *varname)
 
 	expr->etype = ENODE_VARIABLE;
 	expr->u.variable.varname = varname;
+	expr->u.variable.varnum = getOrCreateSymbolId(symbol_table, varname);
 	return expr;
 }
 
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 570cf3306a..14153ebc35 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -118,6 +118,7 @@ typedef int pthread_attr_t;
 
 static int	pthread_create(pthread_t *thread, pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
 static int	pthread_join(pthread_t th, void **thread_return);
+// TODO: mutex?
 #elif defined(ENABLE_THREAD_SAFETY)
 /* Use platform-dependent pthread capability */
 #include <pthread.h>
@@ -232,7 +233,9 @@ const char *progname;
 volatile bool timer_exceeded = false;	/* flag from signal handler */
 
 /*
- * Variable definitions.
+ * Variable contents.
+ *
+ * Variable names are managed in symbol_table with a number.
  *
  * If a variable only has a string value, "svalue" is that value, and value is
  * "not set".  If the value is known, "value" contains the value (in any
@@ -243,11 +246,14 @@ volatile bool timer_exceeded = false;	/* flag from signal handler */
  */
 typedef struct
 {
-	char	   *name;			/* variable's name */
-	char	   *svalue;			/* its value in string form, if known */
-	PgBenchValue value;			/* actual variable's value */
+	int			number;	/* variable symbol number */
+	char		   *svalue;	/* its value in string form, if known */
+	PgBenchValue	value;		/* actual variable's value */
 } Variable;
 
+#define undefined_variable_value(v)						\
+	v.svalue == NULL && v.value.type == PGBT_NO_VALUE
+
 #define MAX_SCRIPTS		128		/* max number of SQL scripts allowed */
 #define SHELL_COMMAND_SIZE	256 /* maximum size allowed for shell command */
 
@@ -395,7 +401,6 @@ typedef struct
 	/* client variables */
 	Variable   *variables;		/* array of variable definitions */
 	int			nvariables;		/* number of variables */
-	bool		vars_sorted;	/* are variables sorted by name? */
 
 	/* various times about current transaction */
 	int64		txn_scheduled;	/* scheduled start time of transaction (usec) */
@@ -493,6 +498,7 @@ static const char *QUERYMODE[] = {"simple", "extended", "prepared"};
  * varprefix 	SQL commands terminated with \gset have this set
  *				to a non NULL value.  If nonempty, it's used to prefix the
  *				variable name that receives the value.
+ * set_varnum	variable symbol number for \set and \setshell.
  * expr			Parsed expression, if needed.
  * stats		Time spent in this command.
  */
@@ -505,6 +511,7 @@ typedef struct Command
 	int			argc;
 	char	   *argv[MAX_ARGS];
 	char	   *varprefix;
+	int			set_varnum;
 	PgBenchExpr *expr;
 	SimpleStats stats;
 } Command;
@@ -523,6 +530,10 @@ static int64 total_weight = 0;
 
 static int	debug = 0;			/* debug flag */
 
+/* also used by exprparse.y */
+#define INITIAL_SYMBOL_TABLE_SIZE 128
+SymbolTable symbol_table = NULL;
+
 /* Builtin test scripts */
 typedef struct BuiltinScript
 {
@@ -1215,39 +1226,16 @@ doConnect(void)
 	return conn;
 }
 
-/* qsort comparator for Variable array */
-static int
-compareVariableNames(const void *v1, const void *v2)
-{
-	return strcmp(((const Variable *) v1)->name,
-				  ((const Variable *) v2)->name);
-}
-
 /* Locate a variable by name; returns NULL if unknown */
 static Variable *
 lookupVariable(CState *st, char *name)
 {
-	Variable	key;
+	int		num = getSymbolId(symbol_table, name, true);
 
-	/* On some versions of Solaris, bsearch of zero items dumps core */
-	if (st->nvariables <= 0)
+	if (0 <= num && num < st->nvariables)
+		return & st->variables[num];
+	else
 		return NULL;
-
-	/* Sort if we have to */
-	if (!st->vars_sorted)
-	{
-		qsort((void *) st->variables, st->nvariables, sizeof(Variable),
-			  compareVariableNames);
-		st->vars_sorted = true;
-	}
-
-	/* Now we can search */
-	key.name = name;
-	return (Variable *) bsearch((void *) &key,
-								(void *) st->variables,
-								st->nvariables,
-								sizeof(Variable),
-								compareVariableNames);
 }
 
 /* Get the value of a variable, in string form; returns NULL if unknown */
@@ -1292,17 +1280,26 @@ makeVariableValue(Variable *var)
 	if (var->value.type != PGBT_NO_VALUE)
 		return true;			/* no work */
 
+	if (var->svalue == NULL || var->svalue[0] == '\0')
+		return false;
+
 	slen = strlen(var->svalue);
 
-	if (slen == 0)
-		/* what should it do on ""? */
-		return false;
+	/* try most probable first */
+	if (is_an_int(var->svalue))
+	{
+		/* if it looks like an int, it must be an int without overflow */
+		int64		iv;
 
-	if (pg_strcasecmp(var->svalue, "null") == 0)
+		if (!strtoint64(var->svalue, false, &iv))
+			return false;
+
+		setIntValue(&var->value, iv);
+	}
+	else if (pg_strcasecmp(var->svalue, "null") == 0)
 	{
 		setNullValue(&var->value);
 	}
-
 	/*
 	 * accept prefixes such as y, ye, n, no... but not for "o". 0/1 are
 	 * recognized later as an int, which is converted to bool if needed.
@@ -1320,16 +1317,6 @@ makeVariableValue(Variable *var)
 	{
 		setBoolValue(&var->value, false);
 	}
-	else if (is_an_int(var->svalue))
-	{
-		/* if it looks like an int, it must be an int without overflow */
-		int64		iv;
-
-		if (!strtoint64(var->svalue, false, &iv))
-			return false;
-
-		setIntValue(&var->value, iv);
-	}
 	else						/* type should be double */
 	{
 		double		dv;
@@ -1338,7 +1325,8 @@ makeVariableValue(Variable *var)
 		{
 			fprintf(stderr,
 					"malformed variable \"%s\" value: \"%s\"\n",
-					var->name, var->svalue);
+					var->number >= 0 ? getSymbolName(symbol_table, var->number) : "<null>",
+					var->svalue);
 			return false;
 		}
 		setDoubleValue(&var->value, dv);
@@ -1380,66 +1368,38 @@ valid_variable_name(const char *name)
 	return true;
 }
 
-/*
- * Lookup a variable by name, creating it if need be.
- * Caller is expected to assign a value to the variable.
- * Returns NULL on failure (bad name).
+/* Assign a string value to a variable, creating it if need be.
+ *
+ * Returns false on failure (bad name)
  */
-static Variable *
-lookupCreateVariable(CState *st, const char *context, char *name)
-{
-	Variable   *var;
-
-	var = lookupVariable(st, name);
-	if (var == NULL)
-	{
-		Variable   *newvars;
-
-		/*
-		 * Check for the name only when declaring a new variable to avoid
-		 * overhead.
-		 */
-		if (!valid_variable_name(name))
-		{
-			fprintf(stderr, "%s: invalid variable name: \"%s\"\n",
-					context, name);
-			return NULL;
-		}
-
-		/* Create variable at the end of the array */
-		if (st->variables)
-			newvars = (Variable *) pg_realloc(st->variables,
-											  (st->nvariables + 1) * sizeof(Variable));
-		else
-			newvars = (Variable *) pg_malloc(sizeof(Variable));
-
-		st->variables = newvars;
-
-		var = &newvars[st->nvariables];
-
-		var->name = pg_strdup(name);
-		var->svalue = NULL;
-		/* caller is expected to initialize remaining fields */
-
-		st->nvariables++;
-		/* we don't re-sort the array till we have to */
-		st->vars_sorted = false;
-	}
-
-	return var;
-}
-
-/* Assign a string value to a variable, creating it if need be */
-/* Returns false on failure (bad name) */
 static bool
 putVariable(CState *st, const char *context, char *name, const char *value)
 {
+	int		num = getOrCreateSymbolId(symbol_table, name);
 	Variable   *var;
 	char	   *val;
 
-	var = lookupCreateVariable(st, context, name);
-	if (!var)
-		return false;
+	if (num < 0 || st->nvariables <= num)
+	{
+		fprintf(stderr, "%s: too many variables while adding \"%s\"\n",
+				context, name);
+		abort();
+	}
+
+	var = & st->variables[num];
+
+	if (var->number == -1)
+	{
+		/* first time the variable is encountered, do some checks */
+		if (!valid_variable_name(name))
+		{
+			fprintf(stderr, "%s: invalid variable name \"%s\"\n",
+					context, name);
+			return false;
+		}
+
+		var->number = num;
+	}
 
 	/* dup then free, in case value is pointing at this variable */
 	val = pg_strdup(value);
@@ -1452,35 +1412,62 @@ putVariable(CState *st, const char *context, char *name, const char *value)
 	return true;
 }
 
-/* Assign a value to a variable, creating it if need be */
-/* Returns false on failure (bad name) */
+/* shortcut version if variable number is known */
 static bool
-putVariableValue(CState *st, const char *context, char *name,
-				 const PgBenchValue *value)
+putSymbolValue(CState *st, const char *context, int var_num,
+			   const PgBenchValue *value)
 {
-	Variable   *var;
+	Variable	*var;
 
-	var = lookupCreateVariable(st, context, name);
-	if (!var)
+	if (var_num < 0 || var_num >= st->nvariables)
+	{
+		fprintf(stderr, "%s: unexpected variable number %d\n",
+				context, var_num);
 		return false;
+	}
+
+	var = & st->variables[var_num];
+
+	if (unlikely(var->number == -1))
+		var->number = var_num;
 
 	if (var->svalue)
+	{
 		free(var->svalue);
-	var->svalue = NULL;
+		var->svalue = NULL;
+	}
 	var->value = *value;
 
 	return true;
 }
 
-/* Assign an integer value to a variable, creating it if need be */
-/* Returns false on failure (bad name) */
+/* shortcut version with variable number */
 static bool
-putVariableInt(CState *st, const char *context, char *name, int64 value)
+putSymbolInt(CState *st, const char *context, int var_num, int64 value)
 {
-	PgBenchValue val;
+	Variable	*var;
 
-	setIntValue(&val, value);
-	return putVariableValue(st, context, name, &val);
+	if (var_num < 0 || var_num >= st->nvariables)
+	{
+		fprintf(stderr, "%s: unexpected variable number %d\n",
+				context, var_num);
+		return false;
+	}
+
+	var = & st->variables[var_num];
+
+	if (unlikely(var->number == -1))
+		var->number = var_num;
+
+	if (var->svalue)
+	{
+		free(var->svalue);
+		var->svalue = NULL;
+	}
+
+	setIntValue(&var->value, value);
+
+	return true;
 }
 
 /*
@@ -2423,16 +2410,18 @@ evaluateExpr(CState *st, PgBenchExpr *expr, PgBenchValue *retval)
 		case ENODE_VARIABLE:
 			{
 				Variable   *var;
+				int			num = expr->u.variable.varnum;
 
-				if ((var = lookupVariable(st, expr->u.variable.varname)) == NULL)
-				{
-					fprintf(stderr, "undefined variable \"%s\"\n",
-							expr->u.variable.varname);
-					return false;
-				}
+				Assert(0 <= num && num < st->nvariables);
+
+				var = & st->variables[num];
 
 				if (!makeVariableValue(var))
+				{
+					fprintf(stderr, "no value for variable \"%s\"\n",
+							expr->u.variable.varname);
 					return false;
+				}
 
 				*retval = var->value;
 				return true;
@@ -2490,7 +2479,7 @@ getMetaCommand(const char *cmd)
  * Return true if succeeded, or false on error.
  */
 static bool
-runShellCommand(CState *st, char *variable, char **argv, int argc)
+runShellCommand(CState *st, int var_num, char **argv, int argc)
 {
 	char		command[SHELL_COMMAND_SIZE];
 	int			i,
@@ -2544,7 +2533,7 @@ runShellCommand(CState *st, char *variable, char **argv, int argc)
 	command[len] = '\0';
 
 	/* Fast path for non-assignment case */
-	if (variable == NULL)
+	if (var_num == -1)
 	{
 		if (system(command))
 		{
@@ -2584,7 +2573,8 @@ runShellCommand(CState *st, char *variable, char **argv, int argc)
 				argv[0], res);
 		return false;
 	}
-	if (!putVariableInt(st, "setshell", variable, retval))
+
+	if (var_num != -1 && !putSymbolInt(st, "setshell", var_num, retval))
 		return false;
 
 #ifdef DEBUG
@@ -3344,7 +3334,7 @@ executeMetaCommand(CState *st, instr_time *now)
 			return CSTATE_ABORTED;
 		}
 
-		if (!putVariableValue(st, argv[0], argv[1], &result))
+		if (!putSymbolValue(st, argv[0], command->set_varnum, &result))
 		{
 			commandFailed(st, "set", "assignment of meta-command failed");
 			return CSTATE_ABORTED;
@@ -3414,7 +3404,7 @@ executeMetaCommand(CState *st, instr_time *now)
 	}
 	else if (command->meta == META_SETSHELL)
 	{
-		if (!runShellCommand(st, argv[1], argv + 2, argc - 2))
+		if (!runShellCommand(st, command->set_varnum, argv + 2, argc - 2))
 		{
 			commandFailed(st, "setshell", "execution of meta-command failed");
 			return CSTATE_ABORTED;
@@ -3422,7 +3412,7 @@ executeMetaCommand(CState *st, instr_time *now)
 	}
 	else if (command->meta == META_SHELL)
 	{
-		if (!runShellCommand(st, NULL, argv + 1, argc - 1))
+		if (!runShellCommand(st, -1, argv + 1, argc - 1))
 		{
 			commandFailed(st, "shell", "execution of meta-command failed");
 			return CSTATE_ABORTED;
@@ -4280,6 +4270,12 @@ process_backslash_command(PsqlScanState sstate, const char *source)
 			offsets[j] = word_offset;
 			my_command->argv[j++] = pg_strdup(word_buf.data);
 			my_command->argc++;
+
+			if (!valid_variable_name(word_buf.data))
+				syntax_error(source, lineno, my_command->first_line, my_command->argv[0],
+							 "invalid variable name", word_buf.data, -1);
+
+			my_command->set_varnum = getOrCreateSymbolId(symbol_table, word_buf.data);
 		}
 
 		/* then for all parse the expression */
@@ -4377,6 +4373,12 @@ process_backslash_command(PsqlScanState sstate, const char *source)
 		if (my_command->argc < 3)
 			syntax_error(source, lineno, my_command->first_line, my_command->argv[0],
 						 "missing argument", NULL, -1);
+
+		if (!valid_variable_name(my_command->argv[1]))
+			syntax_error(source, lineno, my_command->first_line, my_command->argv[0],
+						 "invalid variable name", my_command->argv[1], -1);
+
+		my_command->set_varnum = getOrCreateSymbolId(symbol_table, my_command->argv[1]);
 	}
 	else if (my_command->meta == META_SHELL)
 	{
@@ -5154,6 +5156,8 @@ main(int argc, char **argv)
 
 	int			i;
 	int			nclients_dealt;
+	int			scale_symbol, client_id_symbol,
+					random_seed_symbol, default_seed_symbol;
 
 #ifdef HAVE_GETRLIMIT
 	struct rlimit rlim;
@@ -5190,6 +5194,21 @@ main(int argc, char **argv)
 		login = env;
 
 	state = (CState *) pg_malloc0(sizeof(CState));
+	state->nvariables = INITIAL_SYMBOL_TABLE_SIZE;
+	state->variables = pg_malloc(sizeof(Variable) * state->nvariables);
+	for (int i = 0; i < state->nvariables; i++)
+	{
+		state->variables[i].number = -1;
+		state->variables[i].svalue = NULL;
+		state->variables[i].value.type = PGBT_NO_VALUE;
+	}
+
+	/* symbol table */
+	symbol_table = newSymbolTable();
+	scale_symbol = getOrCreateSymbolId(symbol_table, "scale");
+	client_id_symbol = getOrCreateSymbolId(symbol_table, "client_id");
+	random_seed_symbol = getOrCreateSymbolId(symbol_table, "random_seed");
+	default_seed_symbol = getOrCreateSymbolId(symbol_table, "default_seed");
 
 	/* set random seed early, because it may be used while parsing scripts. */
 	if (!set_random_seed(getenv("PGBENCH_RANDOM_SEED")))
@@ -5649,6 +5668,8 @@ main(int argc, char **argv)
 		exit(1);
 	}
 
+	// dumpSymbolTable(stderr, symbol_table);
+
 	/*
 	 * save main process id in the global variable because process id will be
 	 * changed after fork.
@@ -5657,32 +5678,28 @@ main(int argc, char **argv)
 
 	if (nclients > 1)
 	{
+		int			nvars = numberOfSymbols(symbol_table) + INITIAL_SYMBOL_TABLE_SIZE;
+
 		state = (CState *) pg_realloc(state, sizeof(CState) * nclients);
 		memset(state + 1, 0, sizeof(CState) * (nclients - 1));
 
+		state[0].variables = pg_realloc(state[0].variables, sizeof(Variable) * nvars);
+		state[0].nvariables = nvars;
+
+		for (i = INITIAL_SYMBOL_TABLE_SIZE; i < nvars; i++)
+		{
+			state[0].variables[i].number = -1;
+			state[0].variables[i].svalue = NULL;
+			state[0].variables[i].value.type = PGBT_NO_VALUE;
+		}
+
 		/* copy any -D switch values to all clients */
 		for (i = 1; i < nclients; i++)
 		{
-			int			j;
-
 			state[i].id = i;
-			for (j = 0; j < state[0].nvariables; j++)
-			{
-				Variable   *var = &state[0].variables[j];
-
-				if (var->value.type != PGBT_NO_VALUE)
-				{
-					if (!putVariableValue(&state[i], "startup",
-										  var->name, &var->value))
-						exit(1);
-				}
-				else
-				{
-					if (!putVariable(&state[i], "startup",
-									 var->name, var->svalue))
-						exit(1);
-				}
-			}
+			state[i].variables = pg_malloc(sizeof(Variable) * nvars);
+			state[i].nvariables = nvars;
+			memcpy(state[i].variables, state[0].variables, sizeof(Variable) * nvars);
 		}
 	}
 
@@ -5754,43 +5771,41 @@ main(int argc, char **argv)
 	 * :scale variables normally get -s or database scale, but don't override
 	 * an explicit -D switch
 	 */
-	if (lookupVariable(&state[0], "scale") == NULL)
+	if (undefined_variable_value(state[0].variables[scale_symbol]))
 	{
 		for (i = 0; i < nclients; i++)
-		{
-			if (!putVariableInt(&state[i], "startup", "scale", scale))
+			if (!putSymbolInt(&state[i], "startup", scale_symbol, scale))
 				exit(1);
-		}
 	}
 
 	/*
 	 * Define a :client_id variable that is unique per connection. But don't
 	 * override an explicit -D switch.
 	 */
-	if (lookupVariable(&state[0], "client_id") == NULL)
+	if (undefined_variable_value(state[0].variables[client_id_symbol]))
 	{
 		for (i = 0; i < nclients; i++)
-			if (!putVariableInt(&state[i], "startup", "client_id", i))
+			if (!putSymbolInt(&state[i], "startup", client_id_symbol, i))
 				exit(1);
 	}
 
 	/* set default seed for hash functions */
-	if (lookupVariable(&state[0], "default_seed") == NULL)
+	if (undefined_variable_value(state[0].variables[default_seed_symbol]))
 	{
 		uint64		seed =
 		((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) |
 		(((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) << 32);
 
 		for (i = 0; i < nclients; i++)
-			if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed))
+			if (!putSymbolInt(&state[i], "startup", default_seed_symbol, (int64) seed))
 				exit(1);
 	}
 
 	/* set random seed unless overwritten */
-	if (lookupVariable(&state[0], "random_seed") == NULL)
+	if (undefined_variable_value(state[0].variables[random_seed_symbol]))
 	{
 		for (i = 0; i < nclients; i++)
-			if (!putVariableInt(&state[i], "startup", "random_seed", random_seed))
+			if (!putSymbolInt(&state[i], "startup", random_seed_symbol, random_seed))
 				exit(1);
 	}
 
@@ -5930,6 +5945,8 @@ main(int argc, char **argv)
 	if (exit_code != 0)
 		fprintf(stderr, "Run was aborted; the above results are incomplete.\n");
 
+	freeSymbolTable(symbol_table);
+
 	return exit_code;
 }
 
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index c4a1e298e0..243f59a8ee 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -115,6 +115,7 @@ struct PgBenchExpr
 		struct
 		{
 			char	   *varname;
+			int			varnum;
 		}			variable;
 		struct
 		{
@@ -163,4 +164,21 @@ extern void syntax_error(const char *source, int lineno, const char *line,
 extern bool strtoint64(const char *str, bool errorOK, int64 *pi);
 extern bool strtodouble(const char *str, bool errorOK, double *pd);
 
+/*
+ * A symbol table is an opaque data structure
+ */
+struct SymbolTable_st;
+typedef struct SymbolTable_st *SymbolTable;
+
+SymbolTable newSymbolTable(void);
+void freeSymbolTable(SymbolTable st);
+void dumpSymbolTable(FILE *out, SymbolTable st);
+int numberOfSymbols(SymbolTable st);
+int getOrCreateSymbolId(SymbolTable st, const char *name);
+int getSymbolId(SymbolTable st, const char *name, bool check);
+char *getSymbolName(SymbolTable st, int number);
+int rmSymbolId(SymbolTable st, const char *name);
+
+extern SymbolTable symbol_table;
+
 #endif							/* PGBENCH_H */
diff --git a/src/bin/pgbench/symbol_table.c b/src/bin/pgbench/symbol_table.c
new file mode 100644
index 0000000000..e9229ea74b
--- /dev/null
+++ b/src/bin/pgbench/symbol_table.c
@@ -0,0 +1,317 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(ENABLE_THREAD_SAFETY)
+#include <pthread.h>
+#else
+#define pthread_mutex_t void *
+#define pthread_mutex_init(m, p) 0
+#define pthread_mutex_lock(m)
+#define pthread_mutex_unlock(m)
+#define pthread_mutex_destroy(m)
+#endif
+
+typedef enum { false, true } bool;
+
+struct SymbolTable_st;
+typedef struct SymbolTable_st *SymbolTable;
+SymbolTable newSymbolTable(void);
+void freeSymbolTable(SymbolTable st);
+void dumpSymbolTable(FILE *out, SymbolTable st);
+int numberOfSymbols(SymbolTable st);
+int getOrCreateSymbolId(SymbolTable st, const char *name);
+int getSymbolId(SymbolTable st, const char *name, bool check);
+char *getSymbolName(SymbolTable st, int number);
+int rmSymbolId(SymbolTable st, const char *name);
+
+/*----
+ *
+ * simplistic SymbolTable management.
+ *
+ * Implement a string -> int table based on a tree on bytes.
+ * The data structure makes more sense if many searches are expected.
+ *
+ * Each '\0'-terminated string is store in the tree at the first non-unique
+ * prefix, possibly including the final '\0' if the string is a prefix of
+ * another.
+ *
+ * Storage requirement n * len(s).
+ *
+ * Storing a symbol costs len(s).
+ *
+ * Finding a symbol costs len(s) if checked, len(unique_prefix(s)) if unchecked.
+ */
+
+typedef struct Symbol
+{
+	char   *name;
+	int		number;
+} Symbol;
+
+typedef struct SymbolTableTree
+{
+	bool				is_leaf;
+	union {
+		Symbol						symb;	/* is_leaf */
+		struct SymbolTableTree	   *tree;	/* ! ~ */
+	} u;
+} SymbolTableTree;
+
+struct SymbolTable_st
+{
+	int					nsymbols;		/* next symbol number */
+	int					stored;			/* current #symbols in st */
+	SymbolTableTree	   *root;			/* symbol tree root */
+	int					symbols_size;	/* size of following array */
+	Symbol			  **symbols;		/* array of existing symbols */
+	pthread_mutex_t		lock;
+};
+
+/* allocate a branching node of empty leaves */
+static SymbolTableTree * newSymbolTableTree(void)
+{
+	SymbolTableTree * tree = malloc(sizeof(SymbolTableTree) * 256);
+
+	for (int i = 0; i < 256; i++)
+	{
+		tree[i].is_leaf = true;
+		tree[i].u.symb.name = NULL;
+		tree[i].u.symb.number = -1;
+	}
+
+	return tree;
+}
+
+/* allocate a symbol table */
+SymbolTable newSymbolTable(void)
+{
+	SymbolTable st = malloc(sizeof(struct SymbolTable_st));
+
+	st->nsymbols = 0;
+	st->stored = 0;
+	st->root = malloc(sizeof(SymbolTableTree));
+	st->root->is_leaf = true;
+	st->root->u.symb.name = NULL;
+	st->root->u.symb.number = -1;
+	st->symbols_size = 16;
+	st->symbols = malloc(sizeof(Symbol *) * st->symbols_size);
+	if (pthread_mutex_init(&st->lock, NULL) != 0)
+		abort();
+
+	return st;
+}
+
+/* recursively free a symbol table tree */
+static void freeSymbolTableTree(SymbolTableTree * tree)
+{
+	if (tree->is_leaf)
+	{
+		if (tree->u.symb.name != NULL)
+			free(tree->u.symb.name);
+	}
+	else
+	{
+		for (int i = 0; i < 256; i++)
+			freeSymbolTableTree(& tree->u.tree[i]);
+		free(tree->u.tree);
+	}
+}
+
+/* free symbol table st */
+void freeSymbolTable(SymbolTable st)
+{
+	freeSymbolTableTree(st->root);
+	free(st->root);
+	free(st->symbols);
+	pthread_mutex_destroy(&st->lock);
+	free(st);
+}
+
+/* how many symbols have been seen */
+int numberOfSymbols(SymbolTable st)
+{
+	return st->nsymbols;
+}
+
+/* dump tree to out, in byte order */
+static void dumpSymbolTableTree(FILE * out, SymbolTableTree * tree)
+{
+	if (tree == NULL)
+		fprintf(out, "(null)");
+	else if (tree->is_leaf)
+	{
+		if (tree->u.symb.name != NULL || tree->u.symb.number != -1)
+			fprintf(out, "%s -> %d\n", tree->u.symb.name, tree->u.symb.number);
+	}
+	else
+		for (int i = 0; i < 256; i++)
+			dumpSymbolTableTree(out, & tree->u.tree[i]);
+}
+
+void dumpSymbolTable(FILE * out, SymbolTable st)
+{
+	fprintf(out, "SymbolTable dump: %d symbols (%d seen)\n",
+			st->stored, st->nsymbols);
+	dumpSymbolTableTree(out, st->root);
+}
+
+/* add new symbol to a node. NOT thread-safe. */
+static int addSymbol(SymbolTable st, SymbolTableTree *node, const char *name)
+{
+	int		number;
+
+	number = st->nsymbols++;
+	node->u.symb.name = strdup(name);
+	node->u.symb.number = number;
+
+	if (number >= st->symbols_size)
+	{
+		st->symbols_size *= 2;
+		st->symbols = realloc(st->symbols, sizeof(Symbol *) * st->symbols_size);
+	}
+
+	st->symbols[number] = & node->u.symb;
+	st->stored++;
+
+	return number;
+}
+
+/* get existing id or create a new one for name in st */
+int getOrCreateSymbolId(SymbolTable st, const char *name)
+{
+	SymbolTableTree	*node = st->root;
+	int i = 0;
+
+	/* get down to a leaf */
+	while (!node->is_leaf)
+		node = & node->u.tree[(unsigned char) name[i++]];
+
+	if (node->u.symb.name == NULL)
+	{
+		int		number;
+
+		/* empty leaf, let us use it */
+		pthread_mutex_lock(&st->lock);
+		number = addSymbol(st, node, name);
+		pthread_mutex_unlock(&st->lock);
+
+		return number;
+	}
+	else if ((i > 0 && strcmp(node->u.symb.name + i - 1, name + i - 1) == 0) ||
+			 strcmp(node->u.symb.name, name) == 0)
+		/* already occupied by same name */
+		return node->u.symb.number;
+	else /* it contains another name */
+	{
+		Symbol	prev;
+		int		number;
+
+		/* should lock be acquire before? or redo the descent? */
+		pthread_mutex_lock(&st->lock);
+		prev = node->u.symb;
+
+		/* expand tree from current node */
+		do
+		{
+			node->is_leaf = false;
+			node->u.tree = newSymbolTableTree();
+			node->u.tree[(unsigned char) prev.name[i]].u.symb = prev;
+			node = & node->u.tree[(unsigned char) name[i++]];
+		} while (node->u.symb.name != NULL);
+
+		number = addSymbol(st, node, name);
+
+		pthread_mutex_unlock(&st->lock);
+
+		return number;
+	}
+}
+
+/* return symbol number that exists or -1 */
+int getSymbolId(SymbolTable st, const char * name, bool check)
+{
+	SymbolTableTree	   *node = st->root;
+	int					i = 0;
+
+	while (!node->is_leaf)
+		node = & node->u.tree[(unsigned char) name[i++]];
+
+	// - 1 if tree included final '\0' character
+	if (node->u.symb.name != NULL &&
+		(!check ||
+		 (i>0 && strcmp(node->u.symb.name + i - 1, name + i - 1) == 0) ||
+		 strcmp(node->u.symb.name, name) == 0))
+		return node->u.symb.number;
+	else
+		return -1;
+}
+
+/* return symbol name if exists, or NULL */
+char *getSymbolName(SymbolTable st, int number)
+{
+	return 0 <= number && number < st->nsymbols ? st->symbols[number]->name : NULL;
+}
+
+/* remove name from st, return its number or -1 if not found */
+int rmSymbolId(SymbolTable st, const char * name)
+{
+	SymbolTableTree	   *node;
+	int					i = 0,
+						num;
+
+	pthread_mutex_lock(&st->lock);
+
+	node = st->root;
+	while (!node->is_leaf)
+		node = & node->u.tree[(unsigned char) name[i++]];
+
+	if (node->u.symb.name != NULL && strcmp(node->u.symb.name, name) == 0)
+	{
+		num = node->u.symb.number;
+
+		free(node->u.symb.name);
+		node->u.symb.name = NULL;
+		node->u.symb.number = -1;
+		st->stored--;
+		st->symbols[num] = NULL;
+	}
+	else
+		num = -1;
+
+	pthread_mutex_unlock(&st->lock);
+
+	return num;
+}
+
+#ifdef WITH_MAIN
+int main(void)
+{
+	char *names[27] = {
+		"calvin", "hobbes", "calvin", "susie", "moe", "rosalyn",
+		"v", "va", "var", "var1", "var2", "var3",
+		"var11", "var1", "var11", "hobbes", "moe", "v", "",
+		"é", "été", "ét", "étage",
+		"你好!", "你好", "你",
+		NULL
+	};
+
+	SymbolTable st = newSymbolTable();
+
+	for (int i = 0; names[i] != NULL; i++)
+	{
+		fprintf(stdout, "# %s (%ld)\n", names[i], strlen(names[i]));
+		int prev = getSymbolId(st, names[i], true);
+		int nc = getSymbolId(st, names[i], false);
+		int add = getOrCreateSymbolId(st, names[i]);
+		int post = getSymbolId(st, names[i], false);
+		int rm = rmSymbolId(st, "moe");
+		fprintf(stdout, "%d(%d) %d %d %d\n", prev, nc, add, post, rm);
+	}
+
+	dumpSymbolTable(stderr, st);
+	freeSymbolTable(st);
+
+	return 0;
+}
+#endif
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..493f9ec883 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -622,14 +622,14 @@ SELECT LEAST(} . join(', ', (':i') x 256) . q{)}
 		[qr{unexpected function name}], q{\set i noSuchFunction()}
 	],
 	[
-		'set invalid variable name', 2,
+		'set invalid variable name', 1,
 		[qr{invalid variable name}], q{\set . 1}
 	],
 	[ 'set division by zero', 2, [qr{division by zero}], q{\set i 1/0} ],
 	[
 		'set undefined variable',
 		2,
-		[qr{undefined variable "nosuchvariable"}],
+		[qr{no value for variable "nosuchvariable"}],
 		q{\set i :nosuchvariable}
 	],
 	[ 'set unexpected char', 1, [qr{unexpected character .;.}], q{\set i ;} ],

Reply via email to