I wrote:
> Maybe we should revert this thing pending somebody doing the work to
> make a version of queryid labeling that actually is negligibly cheap.
> It certainly seems like that could be done; one more traversal of the
> parse tree can't be that expensive in itself. I suspect that the
> performance problem is with the particular hashing mechanism that
> was used, which looks mighty ad-hoc anyway.
To put a little bit of meat on that idea, I experimented with jacking
up the "jumble" calculation and driving some other implementations
underneath.
I thought that Julien's "worst case" scenario was pretty far from
worst case, since it involved a join which a lot of simple queries
don't. I tested this scenario instead:
$ cat naive.sql
SELECT * FROM pg_class c ORDER BY oid DESC LIMIT 1;
$ pgbench -n -f naive.sql -T 60 postgres
which is still complicated enough that there's work for the
query fingerprinter to do, but not so much for planning and
execution.
I confirm that on HEAD, there's a noticeable TPS penalty from
turning on compute_query_id: about 3.2% on my machine.
The first patch attached replaces the "jumble" calculation
with two CRC32s (two so that we still get 64 bits out at
the end). I see 2.7% penalty with this version. Now,
I'm using an Intel machine with
#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1
so on machines without any hardware CRC support, this'd
likely be a loss. But it still proves the point that the
existing implementation is just not very speedy.
I then tried a really dumb xor'ing implementation, and
that gives me a slowdown of 2.2%. This could undoubtedly
be improved on further, say by unrolling the loop or
processing multiple bytes at once. One problem with it
is that I suspect it will tend to concentrate the entropy
into the third/fourth and seventh/eighth bytes of the
accumulator, since so many of the fields being jumbled
are 4-byte or 8-byte fields with most of the entropy in
their low-order bits. Probably that could be improved
with a bit more thought -- say, an extra bump of the
nextbyte pointer after each field.
Anyway, I think that what we have here is quite an inferior
implementation, and we can do better with some more thought.
regards, tom lane
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index f004a9ce8c..74ed555ed7 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -41,7 +41,7 @@
static uint64 compute_utility_query_id(const char *str, int query_location, int query_len);
static void AppendJumble(JumbleState *jstate,
- const unsigned char *item, Size size);
+ const void *item, Size size);
static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
@@ -106,9 +106,11 @@ JumbleQuery(Query *query, const char *querytext)
{
jstate = (JumbleState *) palloc(sizeof(JumbleState));
- /* Set up workspace for query jumbling */
- jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
- jstate->jumble_len = 0;
+ /* Initialize CRCs for query jumbling */
+ INIT_CRC32C(jstate->crc0);
+ INIT_CRC32C(jstate->crc1);
+ jstate->whichcrc = 0;
+ /* Initialize state for tracking where constants are */
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
palloc(jstate->clocations_buf_size * sizeof(LocationLen));
@@ -117,9 +119,11 @@ JumbleQuery(Query *query, const char *querytext)
/* Compute query ID and mark the Query node with it */
JumbleQueryInternal(jstate, query);
- query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
- jstate->jumble_len,
- 0));
+
+ FIN_CRC32C(jstate->crc0);
+ FIN_CRC32C(jstate->crc1);
+ query->queryId = (((uint64) jstate->crc0) << 32) |
+ ((uint64) jstate->crc1);
/*
* If we are unlucky enough to get a hash of zero, use 1 instead, to
@@ -165,36 +169,18 @@ compute_utility_query_id(const char *query_text, int query_location, int query_l
* the current jumble.
*/
static void
-AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
+AppendJumble(JumbleState *jstate, const void *item, Size size)
{
- unsigned char *jumble = jstate->jumble;
- Size jumble_len = jstate->jumble_len;
-
- /*
- * Whenever the jumble buffer is full, we hash the current contents and
- * reset the buffer to contain just that hash value, thus relying on the
- * hash to summarize everything so far.
- */
- while (size > 0)
+ if (jstate->whichcrc)
{
- Size part_size;
-
- if (jumble_len >= JUMBLE_SIZE)
- {
- uint64 start_hash;
-
- start_hash = DatumGetUInt64(hash_any_extended(jumble,
- JUMBLE_SIZE, 0));
- memcpy(jumble, &start_hash, sizeof(start_hash));
- jumble_len = sizeof(start_hash);
- }
- part_size = Min(size, JUMBLE_SIZE - jumble_len);
- memcpy(jumble + jumble_len, item, part_size);
- jumble_len += part_size;
- item += part_size;
- size -= part_size;
+ COMP_CRC32C(jstate->crc1, item, size);
+ jstate->whichcrc = 0;
+ }
+ else
+ {
+ COMP_CRC32C(jstate->crc0, item, size);
+ jstate->whichcrc = 1;
}
- jstate->jumble_len = jumble_len;
}
/*
@@ -202,9 +188,9 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
* of individual local variable elements.
*/
#define APP_JUMB(item) \
- AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
+ AppendJumble(jstate, (const void *) &(item), sizeof(item))
#define APP_JUMB_STRING(str) \
- AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
+ AppendJumble(jstate, (const void *) (str), strlen(str) + 1)
/*
* JumbleQueryInternal: Selectively serialize the query tree, appending
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 83ba7339fa..3a09c555fa 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,8 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
-
-#define JUMBLE_SIZE 1024 /* query serialization buffer size */
+#include "port/pg_crc32c.h"
/*
* Struct for tracking locations/lengths of constants during normalization
@@ -33,11 +32,13 @@ typedef struct LocationLen
*/
typedef struct JumbleState
{
- /* Jumble of current query tree */
- unsigned char *jumble;
-
- /* Number of bytes used in jumble[] */
- Size jumble_len;
+ /*
+ * Since we'd like a 64-bit-wide query ID, but we're using 32-bit CRC
+ * technology, we combine two 32-bit CRCs.
+ */
+ pg_crc32c crc0; /* some fields go into here ... */
+ pg_crc32c crc1; /* ... and others into here */
+ int whichcrc; /* 0 or 1, says which CRC accum to use next */
/* Array of locations of constants that should be removed */
LocationLen *clocations;
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index f004a9ce8c..225940d40c 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -106,9 +106,10 @@ JumbleQuery(Query *query, const char *querytext)
{
jstate = (JumbleState *) palloc(sizeof(JumbleState));
- /* Set up workspace for query jumbling */
- jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
- jstate->jumble_len = 0;
+ /* Initialize state for query jumbling */
+ jstate->j.id = 0;
+ jstate->nextbyte = 0;
+ /* Initialize state for tracking where constants are */
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
palloc(jstate->clocations_buf_size * sizeof(LocationLen));
@@ -117,9 +118,7 @@ JumbleQuery(Query *query, const char *querytext)
/* Compute query ID and mark the Query node with it */
JumbleQueryInternal(jstate, query);
- query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
- jstate->jumble_len,
- 0));
+ query->queryId = jstate->j.id;
/*
* If we are unlucky enough to get a hash of zero, use 1 instead, to
@@ -167,34 +166,17 @@ compute_utility_query_id(const char *query_text, int query_location, int query_l
static void
AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
{
- unsigned char *jumble = jstate->jumble;
- Size jumble_len = jstate->jumble_len;
+ int nextbyte = jstate->nextbyte;
- /*
- * Whenever the jumble buffer is full, we hash the current contents and
- * reset the buffer to contain just that hash value, thus relying on the
- * hash to summarize everything so far.
- */
while (size > 0)
{
- Size part_size;
-
- if (jumble_len >= JUMBLE_SIZE)
- {
- uint64 start_hash;
-
- start_hash = DatumGetUInt64(hash_any_extended(jumble,
- JUMBLE_SIZE, 0));
- memcpy(jumble, &start_hash, sizeof(start_hash));
- jumble_len = sizeof(start_hash);
- }
- part_size = Min(size, JUMBLE_SIZE - jumble_len);
- memcpy(jumble + jumble_len, item, part_size);
- jumble_len += part_size;
- item += part_size;
- size -= part_size;
+ jstate->j.bytes[nextbyte] ^= *item++;
+ nextbyte++;
+ if (nextbyte >= sizeof(uint64))
+ nextbyte = 0;
+ size--;
}
- jstate->jumble_len = jumble_len;
+ jstate->nextbyte = nextbyte;
}
/*
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 83ba7339fa..d01712af4d 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -16,8 +16,6 @@
#include "nodes/parsenodes.h"
-#define JUMBLE_SIZE 1024 /* query serialization buffer size */
-
/*
* Struct for tracking locations/lengths of constants during normalization
*/
@@ -33,11 +31,13 @@ typedef struct LocationLen
*/
typedef struct JumbleState
{
- /* Jumble of current query tree */
- unsigned char *jumble;
-
- /* Number of bytes used in jumble[] */
- Size jumble_len;
+ /* Working state for accumulating a 64-bit query ID */
+ union
+ {
+ unsigned char bytes[sizeof(uint64)];
+ uint64 id;
+ } j;
+ int nextbyte; /* index of next byte to update in j.bytes[] */
/* Array of locations of constants that should be removed */
LocationLen *clocations;