I wrote: > Maybe we should revert this thing pending somebody doing the work to > make a version of queryid labeling that actually is negligibly cheap. > It certainly seems like that could be done; one more traversal of the > parse tree can't be that expensive in itself. I suspect that the > performance problem is with the particular hashing mechanism that > was used, which looks mighty ad-hoc anyway.
To put a little bit of meat on that idea, I experimented with jacking up the "jumble" calculation and driving some other implementations underneath. I thought that Julien's "worst case" scenario was pretty far from worst case, since it involved a join which a lot of simple queries don't. I tested this scenario instead: $ cat naive.sql SELECT * FROM pg_class c ORDER BY oid DESC LIMIT 1; $ pgbench -n -f naive.sql -T 60 postgres which is still complicated enough that there's work for the query fingerprinter to do, but not so much for planning and execution. I confirm that on HEAD, there's a noticeable TPS penalty from turning on compute_query_id: about 3.2% on my machine. The first patch attached replaces the "jumble" calculation with two CRC32s (two so that we still get 64 bits out at the end). I see 2.7% penalty with this version. Now, I'm using an Intel machine with #define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1 so on machines without any hardware CRC support, this'd likely be a loss. But it still proves the point that the existing implementation is just not very speedy. I then tried a really dumb xor'ing implementation, and that gives me a slowdown of 2.2%. This could undoubtedly be improved on further, say by unrolling the loop or processing multiple bytes at once. One problem with it is that I suspect it will tend to concentrate the entropy into the third/fourth and seventh/eighth bytes of the accumulator, since so many of the fields being jumbled are 4-byte or 8-byte fields with most of the entropy in their low-order bits. Probably that could be improved with a bit more thought -- say, an extra bump of the nextbyte pointer after each field. Anyway, I think that what we have here is quite an inferior implementation, and we can do better with some more thought. regards, tom lane
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c index f004a9ce8c..74ed555ed7 100644 --- a/src/backend/utils/misc/queryjumble.c +++ b/src/backend/utils/misc/queryjumble.c @@ -41,7 +41,7 @@ static uint64 compute_utility_query_id(const char *str, int query_location, int query_len); static void AppendJumble(JumbleState *jstate, - const unsigned char *item, Size size); + const void *item, Size size); static void JumbleQueryInternal(JumbleState *jstate, Query *query); static void JumbleRangeTable(JumbleState *jstate, List *rtable); static void JumbleRowMarks(JumbleState *jstate, List *rowMarks); @@ -106,9 +106,11 @@ JumbleQuery(Query *query, const char *querytext) { jstate = (JumbleState *) palloc(sizeof(JumbleState)); - /* Set up workspace for query jumbling */ - jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); - jstate->jumble_len = 0; + /* Initialize CRCs for query jumbling */ + INIT_CRC32C(jstate->crc0); + INIT_CRC32C(jstate->crc1); + jstate->whichcrc = 0; + /* Initialize state for tracking where constants are */ jstate->clocations_buf_size = 32; jstate->clocations = (LocationLen *) palloc(jstate->clocations_buf_size * sizeof(LocationLen)); @@ -117,9 +119,11 @@ JumbleQuery(Query *query, const char *querytext) /* Compute query ID and mark the Query node with it */ JumbleQueryInternal(jstate, query); - query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, - jstate->jumble_len, - 0)); + + FIN_CRC32C(jstate->crc0); + FIN_CRC32C(jstate->crc1); + query->queryId = (((uint64) jstate->crc0) << 32) | + ((uint64) jstate->crc1); /* * If we are unlucky enough to get a hash of zero, use 1 instead, to @@ -165,36 +169,18 @@ compute_utility_query_id(const char *query_text, int query_location, int query_l * the current jumble. */ static void -AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) +AppendJumble(JumbleState *jstate, const void *item, Size size) { - unsigned char *jumble = jstate->jumble; - Size jumble_len = jstate->jumble_len; - - /* - * Whenever the jumble buffer is full, we hash the current contents and - * reset the buffer to contain just that hash value, thus relying on the - * hash to summarize everything so far. - */ - while (size > 0) + if (jstate->whichcrc) { - Size part_size; - - if (jumble_len >= JUMBLE_SIZE) - { - uint64 start_hash; - - start_hash = DatumGetUInt64(hash_any_extended(jumble, - JUMBLE_SIZE, 0)); - memcpy(jumble, &start_hash, sizeof(start_hash)); - jumble_len = sizeof(start_hash); - } - part_size = Min(size, JUMBLE_SIZE - jumble_len); - memcpy(jumble + jumble_len, item, part_size); - jumble_len += part_size; - item += part_size; - size -= part_size; + COMP_CRC32C(jstate->crc1, item, size); + jstate->whichcrc = 0; + } + else + { + COMP_CRC32C(jstate->crc0, item, size); + jstate->whichcrc = 1; } - jstate->jumble_len = jumble_len; } /* @@ -202,9 +188,9 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) * of individual local variable elements. */ #define APP_JUMB(item) \ - AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item)) + AppendJumble(jstate, (const void *) &(item), sizeof(item)) #define APP_JUMB_STRING(str) \ - AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1) + AppendJumble(jstate, (const void *) (str), strlen(str) + 1) /* * JumbleQueryInternal: Selectively serialize the query tree, appending diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h index 83ba7339fa..3a09c555fa 100644 --- a/src/include/utils/queryjumble.h +++ b/src/include/utils/queryjumble.h @@ -15,8 +15,7 @@ #define QUERYJUBLE_H #include "nodes/parsenodes.h" - -#define JUMBLE_SIZE 1024 /* query serialization buffer size */ +#include "port/pg_crc32c.h" /* * Struct for tracking locations/lengths of constants during normalization @@ -33,11 +32,13 @@ typedef struct LocationLen */ typedef struct JumbleState { - /* Jumble of current query tree */ - unsigned char *jumble; - - /* Number of bytes used in jumble[] */ - Size jumble_len; + /* + * Since we'd like a 64-bit-wide query ID, but we're using 32-bit CRC + * technology, we combine two 32-bit CRCs. + */ + pg_crc32c crc0; /* some fields go into here ... */ + pg_crc32c crc1; /* ... and others into here */ + int whichcrc; /* 0 or 1, says which CRC accum to use next */ /* Array of locations of constants that should be removed */ LocationLen *clocations;
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c index f004a9ce8c..225940d40c 100644 --- a/src/backend/utils/misc/queryjumble.c +++ b/src/backend/utils/misc/queryjumble.c @@ -106,9 +106,10 @@ JumbleQuery(Query *query, const char *querytext) { jstate = (JumbleState *) palloc(sizeof(JumbleState)); - /* Set up workspace for query jumbling */ - jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); - jstate->jumble_len = 0; + /* Initialize state for query jumbling */ + jstate->j.id = 0; + jstate->nextbyte = 0; + /* Initialize state for tracking where constants are */ jstate->clocations_buf_size = 32; jstate->clocations = (LocationLen *) palloc(jstate->clocations_buf_size * sizeof(LocationLen)); @@ -117,9 +118,7 @@ JumbleQuery(Query *query, const char *querytext) /* Compute query ID and mark the Query node with it */ JumbleQueryInternal(jstate, query); - query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, - jstate->jumble_len, - 0)); + query->queryId = jstate->j.id; /* * If we are unlucky enough to get a hash of zero, use 1 instead, to @@ -167,34 +166,17 @@ compute_utility_query_id(const char *query_text, int query_location, int query_l static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) { - unsigned char *jumble = jstate->jumble; - Size jumble_len = jstate->jumble_len; + int nextbyte = jstate->nextbyte; - /* - * Whenever the jumble buffer is full, we hash the current contents and - * reset the buffer to contain just that hash value, thus relying on the - * hash to summarize everything so far. - */ while (size > 0) { - Size part_size; - - if (jumble_len >= JUMBLE_SIZE) - { - uint64 start_hash; - - start_hash = DatumGetUInt64(hash_any_extended(jumble, - JUMBLE_SIZE, 0)); - memcpy(jumble, &start_hash, sizeof(start_hash)); - jumble_len = sizeof(start_hash); - } - part_size = Min(size, JUMBLE_SIZE - jumble_len); - memcpy(jumble + jumble_len, item, part_size); - jumble_len += part_size; - item += part_size; - size -= part_size; + jstate->j.bytes[nextbyte] ^= *item++; + nextbyte++; + if (nextbyte >= sizeof(uint64)) + nextbyte = 0; + size--; } - jstate->jumble_len = jumble_len; + jstate->nextbyte = nextbyte; } /* diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h index 83ba7339fa..d01712af4d 100644 --- a/src/include/utils/queryjumble.h +++ b/src/include/utils/queryjumble.h @@ -16,8 +16,6 @@ #include "nodes/parsenodes.h" -#define JUMBLE_SIZE 1024 /* query serialization buffer size */ - /* * Struct for tracking locations/lengths of constants during normalization */ @@ -33,11 +31,13 @@ typedef struct LocationLen */ typedef struct JumbleState { - /* Jumble of current query tree */ - unsigned char *jumble; - - /* Number of bytes used in jumble[] */ - Size jumble_len; + /* Working state for accumulating a 64-bit query ID */ + union + { + unsigned char bytes[sizeof(uint64)]; + uint64 id; + } j; + int nextbyte; /* index of next byte to update in j.bytes[] */ /* Array of locations of constants that should be removed */ LocationLen *clocations;