changeset: 6899:ebb93147aec7 user: Kevin McCarthy <ke...@8t8.us> date: Fri Jan 06 14:17:10 2017 -0800 link: http://dev.mutt.org/hg/mutt/rev/ebb93147aec7
Convert HASH to be indexable by unsigned int. (see #3905) Convert the HASH to be usable for either string or unsigned int keys, so that a uid hash can be added for imap. To keep hash-usage code disruption to a minimum, this introduces new create/insert/find/delete functions for the int hash, but keeps the old function names for string keys. This implementation makes the key a union. It may have been a better idea to introduce a whole new structure, but this way allows minimum changes to and maximum reuse of the existing hash code. changeset: 6900:1ad1013cbf5b user: Kevin McCarthy <ke...@8t8.us> date: Fri Jan 06 14:23:28 2017 -0800 link: http://dev.mutt.org/hg/mutt/rev/1ad1013cbf5b Create a uid hash for imap. (see #3905) This hash will allow for more efficient UID SEARCH processing, replacing a linear scan with a hash lookup. changeset: 6901:7c0e7a0769e4 user: Kevin McCarthy <ke...@8t8.us> date: Fri Jan 06 14:37:48 2017 -0800 link: http://dev.mutt.org/hg/mutt/rev/7c0e7a0769e4 Convert cmd_parse_search to use the uid hash. (closes #3905) Replace the linear scan for each result with a hash lookup. This should greatly improve performance for large mailboxes. diffs (445 lines): diff -r 4f0a84b954ef -r 7c0e7a0769e4 hash.c --- a/hash.c Wed Jan 04 19:45:59 2017 -0800 +++ b/hash.c Fri Jan 06 14:37:48 2017 -0800 @@ -29,9 +29,10 @@ #define SOMEPRIME 149711 -static unsigned int hash_string (const unsigned char *s, unsigned int n) +static unsigned int gen_string_hash (union hash_key key, unsigned int n) { unsigned int h = 0; + unsigned char *s = (unsigned char *)key.strkey; while (*s) h += (h << 7) + *s++; @@ -40,9 +41,15 @@ return h; } -static unsigned int hash_case_string (const unsigned char *s, unsigned int n) +static int cmp_string_key (union hash_key a, union hash_key b) +{ + return mutt_strcmp (a.strkey, b.strkey); +} + +static unsigned int gen_case_string_hash (union hash_key key, unsigned int n) { unsigned int h = 0; + unsigned char *s = (unsigned char *)key.strkey; while (*s) h += (h << 7) + tolower (*s++); @@ -51,38 +58,71 @@ return h; } -HASH *hash_create (int nelem, int lower) +static int cmp_case_string_key (union hash_key a, union hash_key b) +{ + return mutt_strcasecmp (a.strkey, b.strkey); +} + +static unsigned int gen_int_hash (union hash_key key, unsigned int n) +{ + return key.intkey % n; +} + +static int cmp_int_key (union hash_key a, union hash_key b) +{ + if (a.intkey == b.intkey) + return 0; + if (a.intkey < b.intkey) + return -1; + return 1; +} + +static HASH *new_hash (int nelem) { HASH *table = safe_malloc (sizeof (HASH)); if (nelem == 0) nelem = 2; table->nelem = nelem; table->table = safe_calloc (nelem, sizeof (struct hash_elem *)); + return table; +} + +HASH *hash_create (int nelem, int lower) +{ + HASH *table = new_hash (nelem); if (lower) { - table->hash_string = hash_case_string; - table->cmp_string = mutt_strcasecmp; + table->gen_hash = gen_case_string_hash; + table->cmp_key = cmp_case_string_key; } else { - table->hash_string = hash_string; - table->cmp_string = mutt_strcmp; + table->gen_hash = gen_string_hash; + table->cmp_key = cmp_string_key; } return table; } +HASH *int_hash_create (int nelem) +{ + HASH *table = new_hash (nelem); + table->gen_hash = gen_int_hash; + table->cmp_key = cmp_int_key; + return table; +} + /* table hash table to update * key key to hash on * data data to associate with `key' * allow_dup if nonzero, duplicate keys are allowed in the table */ -int hash_insert (HASH * table, const char *key, void *data, int allow_dup) +static int union_hash_insert (HASH * table, union hash_key key, void *data, int allow_dup) { struct hash_elem *ptr; unsigned int h; ptr = (struct hash_elem *) safe_malloc (sizeof (struct hash_elem)); - h = table->hash_string ((unsigned char *) key, table->nelem); + h = table->gen_hash (key, table->nelem); ptr->key = key; ptr->data = data; @@ -98,7 +138,7 @@ for (tmp = table->table[h], last = NULL; tmp; last = tmp, tmp = tmp->next) { - r = table->cmp_string (tmp->key, key); + r = table->cmp_key (tmp->key, key); if (r == 0) { FREE (&ptr); @@ -116,33 +156,88 @@ return h; } -void *hash_find_hash (const HASH * table, int hash, const char *key) +int hash_insert (HASH * table, const char *strkey, void *data, int allow_dup) { - struct hash_elem *ptr = table->table[hash]; + union hash_key key; + key.strkey = strkey; + return union_hash_insert (table, key, data, allow_dup); +} + +int int_hash_insert (HASH * table, unsigned int intkey, void *data, int allow_dup) +{ + union hash_key key; + key.intkey = intkey; + return union_hash_insert (table, key, data, allow_dup); +} + +static void *union_hash_find (const HASH *table, union hash_key key) +{ + int hash; + struct hash_elem *ptr; + + if (!table) + return NULL; + + hash = table->gen_hash (key, table->nelem); + ptr = table->table[hash]; for (; ptr; ptr = ptr->next) { - if (table->cmp_string (key, ptr->key) == 0) + if (table->cmp_key (key, ptr->key) == 0) return (ptr->data); } return NULL; } -void hash_delete_hash (HASH * table, int hash, const char *key, const void *data, +void *hash_find (const HASH *table, const char *strkey) +{ + union hash_key key; + key.strkey = strkey; + return union_hash_find (table, key); +} + +void *int_hash_find (const HASH *table, unsigned int intkey) +{ + union hash_key key; + key.intkey = intkey; + return union_hash_find (table, key); +} + +struct hash_elem *hash_find_bucket (const HASH *table, const char *strkey) +{ + union hash_key key; + int hash; + + if (!table) + return NULL; + + key.strkey = strkey; + hash = table->gen_hash (key, table->nelem); + return table->table[hash]; +} + +static void union_hash_delete (HASH *table, union hash_key key, const void *data, void (*destroy) (void *)) { - struct hash_elem *ptr = table->table[hash]; - struct hash_elem **last = &table->table[hash]; + int hash; + struct hash_elem *ptr, **last; - while (ptr) + if (!table) + return; + + hash = table->gen_hash (key, table->nelem); + ptr = table->table[hash]; + last = &table->table[hash]; + + while (ptr) { if ((data == ptr->data || !data) - && table->cmp_string (ptr->key, key) == 0) + && table->cmp_key (ptr->key, key) == 0) { *last = ptr->next; if (destroy) destroy (ptr->data); FREE (&ptr); - + ptr = *last; } else @@ -153,15 +248,35 @@ } } +void hash_delete (HASH *table, const char *strkey, const void *data, + void (*destroy) (void *)) +{ + union hash_key key; + key.strkey = strkey; + union_hash_delete (table, key, data, destroy); +} + +void int_hash_delete (HASH *table, unsigned int intkey, const void *data, + void (*destroy) (void *)) +{ + union hash_key key; + key.intkey = intkey; + union_hash_delete (table, key, data, destroy); +} + /* ptr pointer to the hash table to be freed * destroy() function to call to free the ->data member (optional) */ void hash_destroy (HASH **ptr, void (*destroy) (void *)) { int i; - HASH *pptr = *ptr; + HASH *pptr; struct hash_elem *elem, *tmp; + if (!ptr || !*ptr) + return; + + pptr = *ptr; for (i = 0 ; i < pptr->nelem; i++) { for (elem = pptr->table[i]; elem; ) diff -r 4f0a84b954ef -r 7c0e7a0769e4 hash.h --- a/hash.h Wed Jan 04 19:45:59 2017 -0800 +++ b/hash.h Fri Jan 06 14:37:48 2017 -0800 @@ -19,9 +19,15 @@ #ifndef _HASH_H #define _HASH_H +union hash_key +{ + const char *strkey; + unsigned int intkey; +}; + struct hash_elem { - const char *key; + union hash_key key; void *data; struct hash_elem *next; }; @@ -30,20 +36,27 @@ { int nelem; struct hash_elem **table; - unsigned int (*hash_string)(const unsigned char *, unsigned int); - int (*cmp_string)(const char *, const char *); + unsigned int (*gen_hash)(union hash_key, unsigned int); + int (*cmp_key)(union hash_key, union hash_key); } HASH; -#define hash_find(table, key) hash_find_hash(table, table->hash_string ((unsigned char *)key, table->nelem), key) +HASH *hash_create (int nelem, int lower); +HASH *int_hash_create (int nelem); -#define hash_delete(table,key,data,destroy) hash_delete_hash(table, table->hash_string ((unsigned char *)key, table->nelem), key, data, destroy) +int hash_insert (HASH * table, const char *key, void *data, int allow_dup); +int int_hash_insert (HASH *table, unsigned int key, void *data, int allow_dup); -HASH *hash_create (int nelem, int lower); -int hash_insert (HASH * table, const char *key, void *data, int allow_dup); -void *hash_find_hash (const HASH * table, int hash, const char *key); -void hash_delete_hash (HASH * table, int hash, const char *key, const void *data, - void (*destroy) (void *)); +void *hash_find (const HASH *table, const char *key); +void *int_hash_find (const HASH *table, unsigned int key); + +struct hash_elem *hash_find_bucket (const HASH *table, const char *key); + +void hash_delete (HASH * table, const char *key, const void *data, + void (*destroy) (void *)); +void int_hash_delete (HASH * table, unsigned int key, const void *data, + void (*destroy) (void *)); + void hash_destroy (HASH ** hash, void (*destroy) (void *)); #endif diff -r 4f0a84b954ef -r 7c0e7a0769e4 imap/command.c --- a/imap/command.c Wed Jan 04 19:45:59 2017 -0800 +++ b/imap/command.c Fri Jan 06 14:37:48 2017 -0800 @@ -855,36 +855,20 @@ } } -/* This should be optimised (eg with a tree or hash) */ -static int uid2msgno (IMAP_DATA* idata, unsigned int uid) -{ - int i; - - for (i = 0; i < idata->ctx->msgcount; i++) - { - HEADER* h = idata->ctx->hdrs[i]; - if (HEADER_DATA(h)->uid == uid) - return i; - } - - return -1; -} - /* cmd_parse_search: store SEARCH response for later use */ static void cmd_parse_search (IMAP_DATA* idata, const char* s) { unsigned int uid; - int msgno; + HEADER *h; dprint (2, (debugfile, "Handling SEARCH\n")); while ((s = imap_next_word ((char*)s)) && *s != '\0') { - uid = atoi (s); - msgno = uid2msgno (idata, uid); - - if (msgno >= 0) - idata->ctx->hdrs[msgno]->matched = 1; + uid = (unsigned int)atoi (s); + h = (HEADER *)int_hash_find (idata->uid_hash, uid); + if (h) + h->matched = 1; } } diff -r 4f0a84b954ef -r 7c0e7a0769e4 imap/imap.c --- a/imap/imap.c Wed Jan 04 19:45:59 2017 -0800 +++ b/imap/imap.c Fri Jan 06 14:37:48 2017 -0800 @@ -280,6 +280,8 @@ FREE (&idata->cache[cacheno].path); } + int_hash_delete (idata->uid_hash, HEADER_DATA(h)->uid, h, NULL); + imap_free_header_data ((IMAP_HEADER_DATA**)&h->data); } } @@ -1392,6 +1394,7 @@ /* mailbox may not have fully loaded */ if (ctx->hdrs[i] && ctx->hdrs[i]->data) imap_free_header_data ((IMAP_HEADER_DATA**)&(ctx->hdrs[i]->data)); + hash_destroy (&idata->uid_hash, NULL); for (i = 0; i < IMAP_CACHE_LEN; i++) { diff -r 4f0a84b954ef -r 7c0e7a0769e4 imap/imap_private.h --- a/imap/imap_private.h Wed Jan 04 19:45:59 2017 -0800 +++ b/imap/imap_private.h Fri Jan 06 14:37:48 2017 -0800 @@ -214,6 +214,7 @@ unsigned char reopen; unsigned int newMailCount; IMAP_CACHE cache[IMAP_CACHE_LEN]; + HASH *uid_hash; unsigned int uid_validity; unsigned int uidnext; body_cache_t *bcache; diff -r 4f0a84b954ef -r 7c0e7a0769e4 imap/message.c --- a/imap/message.c Wed Jan 04 19:45:59 2017 -0800 +++ b/imap/message.c Fri Jan 06 14:37:48 2017 -0800 @@ -52,6 +52,23 @@ static int msg_parse_fetch (IMAP_HEADER* h, char* s); static char* msg_parse_flags (IMAP_HEADER* h, char* s); +static void imap_update_context (IMAP_DATA *idata, int oldmsgcount) +{ + CONTEXT *ctx; + HEADER *h; + int msgno; + + ctx = idata->ctx; + if (!idata->uid_hash) + idata->uid_hash = int_hash_create (MAX (6 * ctx->msgcount / 5, 30)); + + for (msgno = oldmsgcount; msgno < ctx->msgcount; msgno++) + { + h = ctx->hdrs[msgno]; + int_hash_insert (idata->uid_hash, HEADER_DATA(h)->uid, h, 0); + } +} + /* imap_read_headers: * Changed to read many headers instead of just one. It will return the * msgno of the last message read. It will return a value other than @@ -377,6 +394,7 @@ { mx_alloc_memory(ctx); mx_update_context (ctx, ctx->msgcount - oldmsgcount); + imap_update_context (idata, oldmsgcount); } idata->reopen |= IMAP_REOPEN_ALLOW; diff -r 4f0a84b954ef -r 7c0e7a0769e4 thread.c --- a/thread.c Wed Jan 04 19:45:59 2017 -0800 +++ b/thread.c Fri Jan 06 14:37:48 2017 -0800 @@ -416,7 +416,6 @@ { struct hash_elem *ptr; THREAD *tmp, *last = NULL; - unsigned int hash; LIST *subjects = NULL, *oldlist; time_t date = 0; @@ -424,9 +423,7 @@ while (subjects) { - hash = ctx->subj_hash->hash_string ((unsigned char *) subjects->data, - ctx->subj_hash->nelem); - for (ptr = ctx->subj_hash->table[hash]; ptr; ptr = ptr->next) + for (ptr = hash_find_bucket (ctx->subj_hash, subjects->data); ptr; ptr = ptr->next) { tmp = ((HEADER *) ptr->data)->thread; if (tmp != cur && /* don't match the same message */