Hello Andres,
If not, do you think advisable to spend time improving the evaluator &
variable stuff and possibly other places for an overall 15% gain?
Also, what would be the likelyhood of such optimization patch to pass?
I could do a limited variable management improvement patch, eventually, I
have funny ideas to speedup the thing, some of which outlined above, some
others even more terrible.
Attached is a quick PoC.
sh> cat set.sql
\set x 0
\set y :x
\set z :y
\set w :z
\set v :w
\set x :v
\set y :x
\set z :y
\set w :z
\set v :w
\set x :v
\set y :x
\set z :y
\set w :z
\set v :w
\set x1 :x
\set x2 :x
\set y1 :z
\set y0 :w
\set helloworld :x
Before the patch:
sh> ./pgbench -T 10 -f vars.sql
...
tps = 802966.183240 (excluding connections establishing) # 0.8M
After the patch:
sh> ./pgbench -T 10 -f vars.sql
...
tps = 2665382.878271 (excluding connections establishing) # 2.6M
Which is a (somehow disappointing) * 3.3 speedup. The impact on the 3
complex expressions tests is not measurable, though.
Probably variable management should be reworked more deeply to cleanup the
code.
Questions:
- how likely is such a patch to pass? (IMHO not likely)
- what is its impact to overall performance when actual queries
are performed (IMHO very small).
--
Fabien.
diff --git a/src/bin/pgbench/Makefile b/src/bin/pgbench/Makefile
index 25abd0a875..39bba12c23 100644
--- a/src/bin/pgbench/Makefile
+++ b/src/bin/pgbench/Makefile
@@ -7,7 +7,7 @@ subdir = src/bin/pgbench
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-OBJS = pgbench.o exprparse.o $(WIN32RES)
+OBJS = pgbench.o exprparse.o symbol_table.o $(WIN32RES)
override CPPFLAGS := -I. -I$(srcdir) -I$(libpq_srcdir) $(CPPFLAGS)
LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index 17c9ec32c5..e5a15ae136 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -212,6 +212,7 @@ make_variable(char *varname)
expr->etype = ENODE_VARIABLE;
expr->u.variable.varname = varname;
+ expr->u.variable.varnum = getOrCreateSymbolId(symbol_table, varname);
return expr;
}
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 570cf3306a..14153ebc35 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -118,6 +118,7 @@ typedef int pthread_attr_t;
static int pthread_create(pthread_t *thread, pthread_attr_t *attr, void *(*start_routine) (void *), void *arg);
static int pthread_join(pthread_t th, void **thread_return);
+// TODO: mutex?
#elif defined(ENABLE_THREAD_SAFETY)
/* Use platform-dependent pthread capability */
#include <pthread.h>
@@ -232,7 +233,9 @@ const char *progname;
volatile bool timer_exceeded = false; /* flag from signal handler */
/*
- * Variable definitions.
+ * Variable contents.
+ *
+ * Variable names are managed in symbol_table with a number.
*
* If a variable only has a string value, "svalue" is that value, and value is
* "not set". If the value is known, "value" contains the value (in any
@@ -243,11 +246,14 @@ volatile bool timer_exceeded = false; /* flag from signal handler */
*/
typedef struct
{
- char *name; /* variable's name */
- char *svalue; /* its value in string form, if known */
- PgBenchValue value; /* actual variable's value */
+ int number; /* variable symbol number */
+ char *svalue; /* its value in string form, if known */
+ PgBenchValue value; /* actual variable's value */
} Variable;
+#define undefined_variable_value(v) \
+ v.svalue == NULL && v.value.type == PGBT_NO_VALUE
+
#define MAX_SCRIPTS 128 /* max number of SQL scripts allowed */
#define SHELL_COMMAND_SIZE 256 /* maximum size allowed for shell command */
@@ -395,7 +401,6 @@ typedef struct
/* client variables */
Variable *variables; /* array of variable definitions */
int nvariables; /* number of variables */
- bool vars_sorted; /* are variables sorted by name? */
/* various times about current transaction */
int64 txn_scheduled; /* scheduled start time of transaction (usec) */
@@ -493,6 +498,7 @@ static const char *QUERYMODE[] = {"simple", "extended", "prepared"};
* varprefix SQL commands terminated with \gset have this set
* to a non NULL value. If nonempty, it's used to prefix the
* variable name that receives the value.
+ * set_varnum variable symbol number for \set and \setshell.
* expr Parsed expression, if needed.
* stats Time spent in this command.
*/
@@ -505,6 +511,7 @@ typedef struct Command
int argc;
char *argv[MAX_ARGS];
char *varprefix;
+ int set_varnum;
PgBenchExpr *expr;
SimpleStats stats;
} Command;
@@ -523,6 +530,10 @@ static int64 total_weight = 0;
static int debug = 0; /* debug flag */
+/* also used by exprparse.y */
+#define INITIAL_SYMBOL_TABLE_SIZE 128
+SymbolTable symbol_table = NULL;
+
/* Builtin test scripts */
typedef struct BuiltinScript
{
@@ -1215,39 +1226,16 @@ doConnect(void)
return conn;
}
-/* qsort comparator for Variable array */
-static int
-compareVariableNames(const void *v1, const void *v2)
-{
- return strcmp(((const Variable *) v1)->name,
- ((const Variable *) v2)->name);
-}
-
/* Locate a variable by name; returns NULL if unknown */
static Variable *
lookupVariable(CState *st, char *name)
{
- Variable key;
+ int num = getSymbolId(symbol_table, name, true);
- /* On some versions of Solaris, bsearch of zero items dumps core */
- if (st->nvariables <= 0)
+ if (0 <= num && num < st->nvariables)
+ return & st->variables[num];
+ else
return NULL;
-
- /* Sort if we have to */
- if (!st->vars_sorted)
- {
- qsort((void *) st->variables, st->nvariables, sizeof(Variable),
- compareVariableNames);
- st->vars_sorted = true;
- }
-
- /* Now we can search */
- key.name = name;
- return (Variable *) bsearch((void *) &key,
- (void *) st->variables,
- st->nvariables,
- sizeof(Variable),
- compareVariableNames);
}
/* Get the value of a variable, in string form; returns NULL if unknown */
@@ -1292,17 +1280,26 @@ makeVariableValue(Variable *var)
if (var->value.type != PGBT_NO_VALUE)
return true; /* no work */
+ if (var->svalue == NULL || var->svalue[0] == '\0')
+ return false;
+
slen = strlen(var->svalue);
- if (slen == 0)
- /* what should it do on ""? */
- return false;
+ /* try most probable first */
+ if (is_an_int(var->svalue))
+ {
+ /* if it looks like an int, it must be an int without overflow */
+ int64 iv;
- if (pg_strcasecmp(var->svalue, "null") == 0)
+ if (!strtoint64(var->svalue, false, &iv))
+ return false;
+
+ setIntValue(&var->value, iv);
+ }
+ else if (pg_strcasecmp(var->svalue, "null") == 0)
{
setNullValue(&var->value);
}
-
/*
* accept prefixes such as y, ye, n, no... but not for "o". 0/1 are
* recognized later as an int, which is converted to bool if needed.
@@ -1320,16 +1317,6 @@ makeVariableValue(Variable *var)
{
setBoolValue(&var->value, false);
}
- else if (is_an_int(var->svalue))
- {
- /* if it looks like an int, it must be an int without overflow */
- int64 iv;
-
- if (!strtoint64(var->svalue, false, &iv))
- return false;
-
- setIntValue(&var->value, iv);
- }
else /* type should be double */
{
double dv;
@@ -1338,7 +1325,8 @@ makeVariableValue(Variable *var)
{
fprintf(stderr,
"malformed variable \"%s\" value: \"%s\"\n",
- var->name, var->svalue);
+ var->number >= 0 ? getSymbolName(symbol_table, var->number) : "<null>",
+ var->svalue);
return false;
}
setDoubleValue(&var->value, dv);
@@ -1380,66 +1368,38 @@ valid_variable_name(const char *name)
return true;
}
-/*
- * Lookup a variable by name, creating it if need be.
- * Caller is expected to assign a value to the variable.
- * Returns NULL on failure (bad name).
+/* Assign a string value to a variable, creating it if need be.
+ *
+ * Returns false on failure (bad name)
*/
-static Variable *
-lookupCreateVariable(CState *st, const char *context, char *name)
-{
- Variable *var;
-
- var = lookupVariable(st, name);
- if (var == NULL)
- {
- Variable *newvars;
-
- /*
- * Check for the name only when declaring a new variable to avoid
- * overhead.
- */
- if (!valid_variable_name(name))
- {
- fprintf(stderr, "%s: invalid variable name: \"%s\"\n",
- context, name);
- return NULL;
- }
-
- /* Create variable at the end of the array */
- if (st->variables)
- newvars = (Variable *) pg_realloc(st->variables,
- (st->nvariables + 1) * sizeof(Variable));
- else
- newvars = (Variable *) pg_malloc(sizeof(Variable));
-
- st->variables = newvars;
-
- var = &newvars[st->nvariables];
-
- var->name = pg_strdup(name);
- var->svalue = NULL;
- /* caller is expected to initialize remaining fields */
-
- st->nvariables++;
- /* we don't re-sort the array till we have to */
- st->vars_sorted = false;
- }
-
- return var;
-}
-
-/* Assign a string value to a variable, creating it if need be */
-/* Returns false on failure (bad name) */
static bool
putVariable(CState *st, const char *context, char *name, const char *value)
{
+ int num = getOrCreateSymbolId(symbol_table, name);
Variable *var;
char *val;
- var = lookupCreateVariable(st, context, name);
- if (!var)
- return false;
+ if (num < 0 || st->nvariables <= num)
+ {
+ fprintf(stderr, "%s: too many variables while adding \"%s\"\n",
+ context, name);
+ abort();
+ }
+
+ var = & st->variables[num];
+
+ if (var->number == -1)
+ {
+ /* first time the variable is encountered, do some checks */
+ if (!valid_variable_name(name))
+ {
+ fprintf(stderr, "%s: invalid variable name \"%s\"\n",
+ context, name);
+ return false;
+ }
+
+ var->number = num;
+ }
/* dup then free, in case value is pointing at this variable */
val = pg_strdup(value);
@@ -1452,35 +1412,62 @@ putVariable(CState *st, const char *context, char *name, const char *value)
return true;
}
-/* Assign a value to a variable, creating it if need be */
-/* Returns false on failure (bad name) */
+/* shortcut version if variable number is known */
static bool
-putVariableValue(CState *st, const char *context, char *name,
- const PgBenchValue *value)
+putSymbolValue(CState *st, const char *context, int var_num,
+ const PgBenchValue *value)
{
- Variable *var;
+ Variable *var;
- var = lookupCreateVariable(st, context, name);
- if (!var)
+ if (var_num < 0 || var_num >= st->nvariables)
+ {
+ fprintf(stderr, "%s: unexpected variable number %d\n",
+ context, var_num);
return false;
+ }
+
+ var = & st->variables[var_num];
+
+ if (unlikely(var->number == -1))
+ var->number = var_num;
if (var->svalue)
+ {
free(var->svalue);
- var->svalue = NULL;
+ var->svalue = NULL;
+ }
var->value = *value;
return true;
}
-/* Assign an integer value to a variable, creating it if need be */
-/* Returns false on failure (bad name) */
+/* shortcut version with variable number */
static bool
-putVariableInt(CState *st, const char *context, char *name, int64 value)
+putSymbolInt(CState *st, const char *context, int var_num, int64 value)
{
- PgBenchValue val;
+ Variable *var;
- setIntValue(&val, value);
- return putVariableValue(st, context, name, &val);
+ if (var_num < 0 || var_num >= st->nvariables)
+ {
+ fprintf(stderr, "%s: unexpected variable number %d\n",
+ context, var_num);
+ return false;
+ }
+
+ var = & st->variables[var_num];
+
+ if (unlikely(var->number == -1))
+ var->number = var_num;
+
+ if (var->svalue)
+ {
+ free(var->svalue);
+ var->svalue = NULL;
+ }
+
+ setIntValue(&var->value, value);
+
+ return true;
}
/*
@@ -2423,16 +2410,18 @@ evaluateExpr(CState *st, PgBenchExpr *expr, PgBenchValue *retval)
case ENODE_VARIABLE:
{
Variable *var;
+ int num = expr->u.variable.varnum;
- if ((var = lookupVariable(st, expr->u.variable.varname)) == NULL)
- {
- fprintf(stderr, "undefined variable \"%s\"\n",
- expr->u.variable.varname);
- return false;
- }
+ Assert(0 <= num && num < st->nvariables);
+
+ var = & st->variables[num];
if (!makeVariableValue(var))
+ {
+ fprintf(stderr, "no value for variable \"%s\"\n",
+ expr->u.variable.varname);
return false;
+ }
*retval = var->value;
return true;
@@ -2490,7 +2479,7 @@ getMetaCommand(const char *cmd)
* Return true if succeeded, or false on error.
*/
static bool
-runShellCommand(CState *st, char *variable, char **argv, int argc)
+runShellCommand(CState *st, int var_num, char **argv, int argc)
{
char command[SHELL_COMMAND_SIZE];
int i,
@@ -2544,7 +2533,7 @@ runShellCommand(CState *st, char *variable, char **argv, int argc)
command[len] = '\0';
/* Fast path for non-assignment case */
- if (variable == NULL)
+ if (var_num == -1)
{
if (system(command))
{
@@ -2584,7 +2573,8 @@ runShellCommand(CState *st, char *variable, char **argv, int argc)
argv[0], res);
return false;
}
- if (!putVariableInt(st, "setshell", variable, retval))
+
+ if (var_num != -1 && !putSymbolInt(st, "setshell", var_num, retval))
return false;
#ifdef DEBUG
@@ -3344,7 +3334,7 @@ executeMetaCommand(CState *st, instr_time *now)
return CSTATE_ABORTED;
}
- if (!putVariableValue(st, argv[0], argv[1], &result))
+ if (!putSymbolValue(st, argv[0], command->set_varnum, &result))
{
commandFailed(st, "set", "assignment of meta-command failed");
return CSTATE_ABORTED;
@@ -3414,7 +3404,7 @@ executeMetaCommand(CState *st, instr_time *now)
}
else if (command->meta == META_SETSHELL)
{
- if (!runShellCommand(st, argv[1], argv + 2, argc - 2))
+ if (!runShellCommand(st, command->set_varnum, argv + 2, argc - 2))
{
commandFailed(st, "setshell", "execution of meta-command failed");
return CSTATE_ABORTED;
@@ -3422,7 +3412,7 @@ executeMetaCommand(CState *st, instr_time *now)
}
else if (command->meta == META_SHELL)
{
- if (!runShellCommand(st, NULL, argv + 1, argc - 1))
+ if (!runShellCommand(st, -1, argv + 1, argc - 1))
{
commandFailed(st, "shell", "execution of meta-command failed");
return CSTATE_ABORTED;
@@ -4280,6 +4270,12 @@ process_backslash_command(PsqlScanState sstate, const char *source)
offsets[j] = word_offset;
my_command->argv[j++] = pg_strdup(word_buf.data);
my_command->argc++;
+
+ if (!valid_variable_name(word_buf.data))
+ syntax_error(source, lineno, my_command->first_line, my_command->argv[0],
+ "invalid variable name", word_buf.data, -1);
+
+ my_command->set_varnum = getOrCreateSymbolId(symbol_table, word_buf.data);
}
/* then for all parse the expression */
@@ -4377,6 +4373,12 @@ process_backslash_command(PsqlScanState sstate, const char *source)
if (my_command->argc < 3)
syntax_error(source, lineno, my_command->first_line, my_command->argv[0],
"missing argument", NULL, -1);
+
+ if (!valid_variable_name(my_command->argv[1]))
+ syntax_error(source, lineno, my_command->first_line, my_command->argv[0],
+ "invalid variable name", my_command->argv[1], -1);
+
+ my_command->set_varnum = getOrCreateSymbolId(symbol_table, my_command->argv[1]);
}
else if (my_command->meta == META_SHELL)
{
@@ -5154,6 +5156,8 @@ main(int argc, char **argv)
int i;
int nclients_dealt;
+ int scale_symbol, client_id_symbol,
+ random_seed_symbol, default_seed_symbol;
#ifdef HAVE_GETRLIMIT
struct rlimit rlim;
@@ -5190,6 +5194,21 @@ main(int argc, char **argv)
login = env;
state = (CState *) pg_malloc0(sizeof(CState));
+ state->nvariables = INITIAL_SYMBOL_TABLE_SIZE;
+ state->variables = pg_malloc(sizeof(Variable) * state->nvariables);
+ for (int i = 0; i < state->nvariables; i++)
+ {
+ state->variables[i].number = -1;
+ state->variables[i].svalue = NULL;
+ state->variables[i].value.type = PGBT_NO_VALUE;
+ }
+
+ /* symbol table */
+ symbol_table = newSymbolTable();
+ scale_symbol = getOrCreateSymbolId(symbol_table, "scale");
+ client_id_symbol = getOrCreateSymbolId(symbol_table, "client_id");
+ random_seed_symbol = getOrCreateSymbolId(symbol_table, "random_seed");
+ default_seed_symbol = getOrCreateSymbolId(symbol_table, "default_seed");
/* set random seed early, because it may be used while parsing scripts. */
if (!set_random_seed(getenv("PGBENCH_RANDOM_SEED")))
@@ -5649,6 +5668,8 @@ main(int argc, char **argv)
exit(1);
}
+ // dumpSymbolTable(stderr, symbol_table);
+
/*
* save main process id in the global variable because process id will be
* changed after fork.
@@ -5657,32 +5678,28 @@ main(int argc, char **argv)
if (nclients > 1)
{
+ int nvars = numberOfSymbols(symbol_table) + INITIAL_SYMBOL_TABLE_SIZE;
+
state = (CState *) pg_realloc(state, sizeof(CState) * nclients);
memset(state + 1, 0, sizeof(CState) * (nclients - 1));
+ state[0].variables = pg_realloc(state[0].variables, sizeof(Variable) * nvars);
+ state[0].nvariables = nvars;
+
+ for (i = INITIAL_SYMBOL_TABLE_SIZE; i < nvars; i++)
+ {
+ state[0].variables[i].number = -1;
+ state[0].variables[i].svalue = NULL;
+ state[0].variables[i].value.type = PGBT_NO_VALUE;
+ }
+
/* copy any -D switch values to all clients */
for (i = 1; i < nclients; i++)
{
- int j;
-
state[i].id = i;
- for (j = 0; j < state[0].nvariables; j++)
- {
- Variable *var = &state[0].variables[j];
-
- if (var->value.type != PGBT_NO_VALUE)
- {
- if (!putVariableValue(&state[i], "startup",
- var->name, &var->value))
- exit(1);
- }
- else
- {
- if (!putVariable(&state[i], "startup",
- var->name, var->svalue))
- exit(1);
- }
- }
+ state[i].variables = pg_malloc(sizeof(Variable) * nvars);
+ state[i].nvariables = nvars;
+ memcpy(state[i].variables, state[0].variables, sizeof(Variable) * nvars);
}
}
@@ -5754,43 +5771,41 @@ main(int argc, char **argv)
* :scale variables normally get -s or database scale, but don't override
* an explicit -D switch
*/
- if (lookupVariable(&state[0], "scale") == NULL)
+ if (undefined_variable_value(state[0].variables[scale_symbol]))
{
for (i = 0; i < nclients; i++)
- {
- if (!putVariableInt(&state[i], "startup", "scale", scale))
+ if (!putSymbolInt(&state[i], "startup", scale_symbol, scale))
exit(1);
- }
}
/*
* Define a :client_id variable that is unique per connection. But don't
* override an explicit -D switch.
*/
- if (lookupVariable(&state[0], "client_id") == NULL)
+ if (undefined_variable_value(state[0].variables[client_id_symbol]))
{
for (i = 0; i < nclients; i++)
- if (!putVariableInt(&state[i], "startup", "client_id", i))
+ if (!putSymbolInt(&state[i], "startup", client_id_symbol, i))
exit(1);
}
/* set default seed for hash functions */
- if (lookupVariable(&state[0], "default_seed") == NULL)
+ if (undefined_variable_value(state[0].variables[default_seed_symbol]))
{
uint64 seed =
((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) |
(((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) << 32);
for (i = 0; i < nclients; i++)
- if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed))
+ if (!putSymbolInt(&state[i], "startup", default_seed_symbol, (int64) seed))
exit(1);
}
/* set random seed unless overwritten */
- if (lookupVariable(&state[0], "random_seed") == NULL)
+ if (undefined_variable_value(state[0].variables[random_seed_symbol]))
{
for (i = 0; i < nclients; i++)
- if (!putVariableInt(&state[i], "startup", "random_seed", random_seed))
+ if (!putSymbolInt(&state[i], "startup", random_seed_symbol, random_seed))
exit(1);
}
@@ -5930,6 +5945,8 @@ main(int argc, char **argv)
if (exit_code != 0)
fprintf(stderr, "Run was aborted; the above results are incomplete.\n");
+ freeSymbolTable(symbol_table);
+
return exit_code;
}
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index c4a1e298e0..243f59a8ee 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -115,6 +115,7 @@ struct PgBenchExpr
struct
{
char *varname;
+ int varnum;
} variable;
struct
{
@@ -163,4 +164,21 @@ extern void syntax_error(const char *source, int lineno, const char *line,
extern bool strtoint64(const char *str, bool errorOK, int64 *pi);
extern bool strtodouble(const char *str, bool errorOK, double *pd);
+/*
+ * A symbol table is an opaque data structure
+ */
+struct SymbolTable_st;
+typedef struct SymbolTable_st *SymbolTable;
+
+SymbolTable newSymbolTable(void);
+void freeSymbolTable(SymbolTable st);
+void dumpSymbolTable(FILE *out, SymbolTable st);
+int numberOfSymbols(SymbolTable st);
+int getOrCreateSymbolId(SymbolTable st, const char *name);
+int getSymbolId(SymbolTable st, const char *name, bool check);
+char *getSymbolName(SymbolTable st, int number);
+int rmSymbolId(SymbolTable st, const char *name);
+
+extern SymbolTable symbol_table;
+
#endif /* PGBENCH_H */
diff --git a/src/bin/pgbench/symbol_table.c b/src/bin/pgbench/symbol_table.c
new file mode 100644
index 0000000000..e9229ea74b
--- /dev/null
+++ b/src/bin/pgbench/symbol_table.c
@@ -0,0 +1,317 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(ENABLE_THREAD_SAFETY)
+#include <pthread.h>
+#else
+#define pthread_mutex_t void *
+#define pthread_mutex_init(m, p) 0
+#define pthread_mutex_lock(m)
+#define pthread_mutex_unlock(m)
+#define pthread_mutex_destroy(m)
+#endif
+
+typedef enum { false, true } bool;
+
+struct SymbolTable_st;
+typedef struct SymbolTable_st *SymbolTable;
+SymbolTable newSymbolTable(void);
+void freeSymbolTable(SymbolTable st);
+void dumpSymbolTable(FILE *out, SymbolTable st);
+int numberOfSymbols(SymbolTable st);
+int getOrCreateSymbolId(SymbolTable st, const char *name);
+int getSymbolId(SymbolTable st, const char *name, bool check);
+char *getSymbolName(SymbolTable st, int number);
+int rmSymbolId(SymbolTable st, const char *name);
+
+/*----
+ *
+ * simplistic SymbolTable management.
+ *
+ * Implement a string -> int table based on a tree on bytes.
+ * The data structure makes more sense if many searches are expected.
+ *
+ * Each '\0'-terminated string is store in the tree at the first non-unique
+ * prefix, possibly including the final '\0' if the string is a prefix of
+ * another.
+ *
+ * Storage requirement n * len(s).
+ *
+ * Storing a symbol costs len(s).
+ *
+ * Finding a symbol costs len(s) if checked, len(unique_prefix(s)) if unchecked.
+ */
+
+typedef struct Symbol
+{
+ char *name;
+ int number;
+} Symbol;
+
+typedef struct SymbolTableTree
+{
+ bool is_leaf;
+ union {
+ Symbol symb; /* is_leaf */
+ struct SymbolTableTree *tree; /* ! ~ */
+ } u;
+} SymbolTableTree;
+
+struct SymbolTable_st
+{
+ int nsymbols; /* next symbol number */
+ int stored; /* current #symbols in st */
+ SymbolTableTree *root; /* symbol tree root */
+ int symbols_size; /* size of following array */
+ Symbol **symbols; /* array of existing symbols */
+ pthread_mutex_t lock;
+};
+
+/* allocate a branching node of empty leaves */
+static SymbolTableTree * newSymbolTableTree(void)
+{
+ SymbolTableTree * tree = malloc(sizeof(SymbolTableTree) * 256);
+
+ for (int i = 0; i < 256; i++)
+ {
+ tree[i].is_leaf = true;
+ tree[i].u.symb.name = NULL;
+ tree[i].u.symb.number = -1;
+ }
+
+ return tree;
+}
+
+/* allocate a symbol table */
+SymbolTable newSymbolTable(void)
+{
+ SymbolTable st = malloc(sizeof(struct SymbolTable_st));
+
+ st->nsymbols = 0;
+ st->stored = 0;
+ st->root = malloc(sizeof(SymbolTableTree));
+ st->root->is_leaf = true;
+ st->root->u.symb.name = NULL;
+ st->root->u.symb.number = -1;
+ st->symbols_size = 16;
+ st->symbols = malloc(sizeof(Symbol *) * st->symbols_size);
+ if (pthread_mutex_init(&st->lock, NULL) != 0)
+ abort();
+
+ return st;
+}
+
+/* recursively free a symbol table tree */
+static void freeSymbolTableTree(SymbolTableTree * tree)
+{
+ if (tree->is_leaf)
+ {
+ if (tree->u.symb.name != NULL)
+ free(tree->u.symb.name);
+ }
+ else
+ {
+ for (int i = 0; i < 256; i++)
+ freeSymbolTableTree(& tree->u.tree[i]);
+ free(tree->u.tree);
+ }
+}
+
+/* free symbol table st */
+void freeSymbolTable(SymbolTable st)
+{
+ freeSymbolTableTree(st->root);
+ free(st->root);
+ free(st->symbols);
+ pthread_mutex_destroy(&st->lock);
+ free(st);
+}
+
+/* how many symbols have been seen */
+int numberOfSymbols(SymbolTable st)
+{
+ return st->nsymbols;
+}
+
+/* dump tree to out, in byte order */
+static void dumpSymbolTableTree(FILE * out, SymbolTableTree * tree)
+{
+ if (tree == NULL)
+ fprintf(out, "(null)");
+ else if (tree->is_leaf)
+ {
+ if (tree->u.symb.name != NULL || tree->u.symb.number != -1)
+ fprintf(out, "%s -> %d\n", tree->u.symb.name, tree->u.symb.number);
+ }
+ else
+ for (int i = 0; i < 256; i++)
+ dumpSymbolTableTree(out, & tree->u.tree[i]);
+}
+
+void dumpSymbolTable(FILE * out, SymbolTable st)
+{
+ fprintf(out, "SymbolTable dump: %d symbols (%d seen)\n",
+ st->stored, st->nsymbols);
+ dumpSymbolTableTree(out, st->root);
+}
+
+/* add new symbol to a node. NOT thread-safe. */
+static int addSymbol(SymbolTable st, SymbolTableTree *node, const char *name)
+{
+ int number;
+
+ number = st->nsymbols++;
+ node->u.symb.name = strdup(name);
+ node->u.symb.number = number;
+
+ if (number >= st->symbols_size)
+ {
+ st->symbols_size *= 2;
+ st->symbols = realloc(st->symbols, sizeof(Symbol *) * st->symbols_size);
+ }
+
+ st->symbols[number] = & node->u.symb;
+ st->stored++;
+
+ return number;
+}
+
+/* get existing id or create a new one for name in st */
+int getOrCreateSymbolId(SymbolTable st, const char *name)
+{
+ SymbolTableTree *node = st->root;
+ int i = 0;
+
+ /* get down to a leaf */
+ while (!node->is_leaf)
+ node = & node->u.tree[(unsigned char) name[i++]];
+
+ if (node->u.symb.name == NULL)
+ {
+ int number;
+
+ /* empty leaf, let us use it */
+ pthread_mutex_lock(&st->lock);
+ number = addSymbol(st, node, name);
+ pthread_mutex_unlock(&st->lock);
+
+ return number;
+ }
+ else if ((i > 0 && strcmp(node->u.symb.name + i - 1, name + i - 1) == 0) ||
+ strcmp(node->u.symb.name, name) == 0)
+ /* already occupied by same name */
+ return node->u.symb.number;
+ else /* it contains another name */
+ {
+ Symbol prev;
+ int number;
+
+ /* should lock be acquire before? or redo the descent? */
+ pthread_mutex_lock(&st->lock);
+ prev = node->u.symb;
+
+ /* expand tree from current node */
+ do
+ {
+ node->is_leaf = false;
+ node->u.tree = newSymbolTableTree();
+ node->u.tree[(unsigned char) prev.name[i]].u.symb = prev;
+ node = & node->u.tree[(unsigned char) name[i++]];
+ } while (node->u.symb.name != NULL);
+
+ number = addSymbol(st, node, name);
+
+ pthread_mutex_unlock(&st->lock);
+
+ return number;
+ }
+}
+
+/* return symbol number that exists or -1 */
+int getSymbolId(SymbolTable st, const char * name, bool check)
+{
+ SymbolTableTree *node = st->root;
+ int i = 0;
+
+ while (!node->is_leaf)
+ node = & node->u.tree[(unsigned char) name[i++]];
+
+ // - 1 if tree included final '\0' character
+ if (node->u.symb.name != NULL &&
+ (!check ||
+ (i>0 && strcmp(node->u.symb.name + i - 1, name + i - 1) == 0) ||
+ strcmp(node->u.symb.name, name) == 0))
+ return node->u.symb.number;
+ else
+ return -1;
+}
+
+/* return symbol name if exists, or NULL */
+char *getSymbolName(SymbolTable st, int number)
+{
+ return 0 <= number && number < st->nsymbols ? st->symbols[number]->name : NULL;
+}
+
+/* remove name from st, return its number or -1 if not found */
+int rmSymbolId(SymbolTable st, const char * name)
+{
+ SymbolTableTree *node;
+ int i = 0,
+ num;
+
+ pthread_mutex_lock(&st->lock);
+
+ node = st->root;
+ while (!node->is_leaf)
+ node = & node->u.tree[(unsigned char) name[i++]];
+
+ if (node->u.symb.name != NULL && strcmp(node->u.symb.name, name) == 0)
+ {
+ num = node->u.symb.number;
+
+ free(node->u.symb.name);
+ node->u.symb.name = NULL;
+ node->u.symb.number = -1;
+ st->stored--;
+ st->symbols[num] = NULL;
+ }
+ else
+ num = -1;
+
+ pthread_mutex_unlock(&st->lock);
+
+ return num;
+}
+
+#ifdef WITH_MAIN
+int main(void)
+{
+ char *names[27] = {
+ "calvin", "hobbes", "calvin", "susie", "moe", "rosalyn",
+ "v", "va", "var", "var1", "var2", "var3",
+ "var11", "var1", "var11", "hobbes", "moe", "v", "",
+ "é", "été", "ét", "étage",
+ "ä½ å¥½!", "ä½ å¥½", "ä½ ",
+ NULL
+ };
+
+ SymbolTable st = newSymbolTable();
+
+ for (int i = 0; names[i] != NULL; i++)
+ {
+ fprintf(stdout, "# %s (%ld)\n", names[i], strlen(names[i]));
+ int prev = getSymbolId(st, names[i], true);
+ int nc = getSymbolId(st, names[i], false);
+ int add = getOrCreateSymbolId(st, names[i]);
+ int post = getSymbolId(st, names[i], false);
+ int rm = rmSymbolId(st, "moe");
+ fprintf(stdout, "%d(%d) %d %d %d\n", prev, nc, add, post, rm);
+ }
+
+ dumpSymbolTable(stderr, st);
+ freeSymbolTable(st);
+
+ return 0;
+}
+#endif
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index b82d3f65c4..493f9ec883 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -622,14 +622,14 @@ SELECT LEAST(} . join(', ', (':i') x 256) . q{)}
[qr{unexpected function name}], q{\set i noSuchFunction()}
],
[
- 'set invalid variable name', 2,
+ 'set invalid variable name', 1,
[qr{invalid variable name}], q{\set . 1}
],
[ 'set division by zero', 2, [qr{division by zero}], q{\set i 1/0} ],
[
'set undefined variable',
2,
- [qr{undefined variable "nosuchvariable"}],
+ [qr{no value for variable "nosuchvariable"}],
q{\set i :nosuchvariable}
],
[ 'set unexpected char', 1, [qr{unexpected character .;.}], q{\set i ;} ],