changeset: 6899:ebb93147aec7
user:      Kevin McCarthy <ke...@8t8.us>
date:      Fri Jan 06 14:17:10 2017 -0800
link:      http://dev.mutt.org/hg/mutt/rev/ebb93147aec7

Convert HASH to be indexable by unsigned int. (see #3905)

Convert the HASH to be usable for either string or unsigned int keys,
so that a uid hash can be added for imap.

To keep hash-usage code disruption to a minimum, this introduces new
create/insert/find/delete functions for the int hash, but keeps the
old function names for string keys.

This implementation makes the key a union.  It may have been a better
idea to introduce a whole new structure, but this way allows minimum
changes to and maximum reuse of the existing hash code.

changeset: 6900:1ad1013cbf5b
user:      Kevin McCarthy <ke...@8t8.us>
date:      Fri Jan 06 14:23:28 2017 -0800
link:      http://dev.mutt.org/hg/mutt/rev/1ad1013cbf5b

Create a uid hash for imap. (see #3905)

This hash will allow for more efficient UID SEARCH processing,
replacing a linear scan with a hash lookup.

changeset: 6901:7c0e7a0769e4
user:      Kevin McCarthy <ke...@8t8.us>
date:      Fri Jan 06 14:37:48 2017 -0800
link:      http://dev.mutt.org/hg/mutt/rev/7c0e7a0769e4

Convert cmd_parse_search to use the uid hash. (closes #3905)

Replace the linear scan for each result with a hash lookup.  This
should greatly improve performance for large mailboxes.

diffs (445 lines):

diff -r 4f0a84b954ef -r 7c0e7a0769e4 hash.c
--- a/hash.c    Wed Jan 04 19:45:59 2017 -0800
+++ b/hash.c    Fri Jan 06 14:37:48 2017 -0800
@@ -29,9 +29,10 @@
 
 #define SOMEPRIME 149711
 
-static unsigned int hash_string (const unsigned char *s, unsigned int n)
+static unsigned int gen_string_hash (union hash_key key, unsigned int n)
 {
   unsigned int h = 0;
+  unsigned char *s = (unsigned char *)key.strkey;
 
   while (*s)
     h += (h << 7) + *s++;
@@ -40,9 +41,15 @@
   return h;
 }
 
-static unsigned int hash_case_string (const unsigned char *s, unsigned int n)
+static int cmp_string_key (union hash_key a, union hash_key b)
+{
+  return mutt_strcmp (a.strkey, b.strkey);
+}
+
+static unsigned int gen_case_string_hash (union hash_key key, unsigned int n)
 {
   unsigned int h = 0;
+  unsigned char *s = (unsigned char *)key.strkey;
 
   while (*s)
     h += (h << 7) + tolower (*s++);
@@ -51,38 +58,71 @@
   return h;
 }
 
-HASH *hash_create (int nelem, int lower)
+static int cmp_case_string_key (union hash_key a, union hash_key b)
+{
+  return mutt_strcasecmp (a.strkey, b.strkey);
+}
+
+static unsigned int gen_int_hash (union hash_key key, unsigned int n)
+{
+  return key.intkey % n;
+}
+
+static int cmp_int_key (union hash_key a, union hash_key b)
+{
+  if (a.intkey == b.intkey)
+    return 0;
+  if (a.intkey < b.intkey)
+    return -1;
+  return 1;
+}
+
+static HASH *new_hash (int nelem)
 {
   HASH *table = safe_malloc (sizeof (HASH));
   if (nelem == 0)
     nelem = 2;
   table->nelem = nelem;
   table->table = safe_calloc (nelem, sizeof (struct hash_elem *));
+  return table;
+}
+
+HASH *hash_create (int nelem, int lower)
+{
+  HASH *table = new_hash (nelem);
   if (lower)
   {
-    table->hash_string = hash_case_string;
-    table->cmp_string = mutt_strcasecmp;
+    table->gen_hash = gen_case_string_hash;
+    table->cmp_key = cmp_case_string_key;
   }
   else
   {
-    table->hash_string = hash_string;
-    table->cmp_string = mutt_strcmp;
+    table->gen_hash = gen_string_hash;
+    table->cmp_key = cmp_string_key;
   }
   return table;
 }
 
+HASH *int_hash_create (int nelem)
+{
+  HASH *table = new_hash (nelem);
+  table->gen_hash = gen_int_hash;
+  table->cmp_key = cmp_int_key;
+  return table;
+}
+
 /* table        hash table to update
  * key          key to hash on
  * data         data to associate with `key'
  * allow_dup    if nonzero, duplicate keys are allowed in the table 
  */
-int hash_insert (HASH * table, const char *key, void *data, int allow_dup)
+static int union_hash_insert (HASH * table, union hash_key key, void *data, 
int allow_dup)
 {
   struct hash_elem *ptr;
   unsigned int h;
 
   ptr = (struct hash_elem *) safe_malloc (sizeof (struct hash_elem));
-  h = table->hash_string ((unsigned char *) key, table->nelem);
+  h = table->gen_hash (key, table->nelem);
   ptr->key = key;
   ptr->data = data;
 
@@ -98,7 +138,7 @@
 
     for (tmp = table->table[h], last = NULL; tmp; last = tmp, tmp = tmp->next)
     {
-      r = table->cmp_string (tmp->key, key);
+      r = table->cmp_key (tmp->key, key);
       if (r == 0)
       {
        FREE (&ptr);
@@ -116,33 +156,88 @@
   return h;
 }
 
-void *hash_find_hash (const HASH * table, int hash, const char *key)
+int hash_insert (HASH * table, const char *strkey, void *data, int allow_dup)
 {
-  struct hash_elem *ptr = table->table[hash];
+  union hash_key key;
+  key.strkey = strkey;
+  return union_hash_insert (table, key, data, allow_dup);
+}
+
+int int_hash_insert (HASH * table, unsigned int intkey, void *data, int 
allow_dup)
+{
+  union hash_key key;
+  key.intkey = intkey;
+  return union_hash_insert (table, key, data, allow_dup);
+}
+
+static void *union_hash_find (const HASH *table, union hash_key key)
+{
+  int hash;
+  struct hash_elem *ptr;
+
+  if (!table)
+    return NULL;
+
+  hash = table->gen_hash (key, table->nelem);
+  ptr = table->table[hash];
   for (; ptr; ptr = ptr->next)
   {
-    if (table->cmp_string (key, ptr->key) == 0)
+    if (table->cmp_key (key, ptr->key) == 0)
       return (ptr->data);
   }
   return NULL;
 }
 
-void hash_delete_hash (HASH * table, int hash, const char *key, const void 
*data,
+void *hash_find (const HASH *table, const char *strkey)
+{
+  union hash_key key;
+  key.strkey = strkey;
+  return union_hash_find (table, key);
+}
+
+void *int_hash_find (const HASH *table, unsigned int intkey)
+{
+  union hash_key key;
+  key.intkey = intkey;
+  return union_hash_find (table, key);
+}
+
+struct hash_elem *hash_find_bucket (const HASH *table, const char *strkey)
+{
+  union hash_key key;
+  int hash;
+
+  if (!table)
+    return NULL;
+
+  key.strkey = strkey;
+  hash = table->gen_hash (key, table->nelem);
+  return table->table[hash];
+}
+
+static void union_hash_delete (HASH *table, union hash_key key, const void 
*data,
                       void (*destroy) (void *))
 {
-  struct hash_elem *ptr = table->table[hash];
-  struct hash_elem **last = &table->table[hash];
+  int hash;
+  struct hash_elem *ptr, **last;
 
-  while (ptr) 
+  if (!table)
+    return;
+
+  hash = table->gen_hash (key, table->nelem);
+  ptr = table->table[hash];
+  last = &table->table[hash];
+
+  while (ptr)
   {
     if ((data == ptr->data || !data)
-       && table->cmp_string (ptr->key, key) == 0)
+       && table->cmp_key (ptr->key, key) == 0)
     {
       *last = ptr->next;
       if (destroy)
        destroy (ptr->data);
       FREE (&ptr);
-      
+
       ptr = *last;
     }
     else
@@ -153,15 +248,35 @@
   }
 }
 
+void hash_delete (HASH *table, const char *strkey, const void *data,
+                  void (*destroy) (void *))
+{
+  union hash_key key;
+  key.strkey = strkey;
+  union_hash_delete (table, key, data, destroy);
+}
+
+void int_hash_delete (HASH *table, unsigned int intkey, const void *data,
+                  void (*destroy) (void *))
+{
+  union hash_key key;
+  key.intkey = intkey;
+  union_hash_delete (table, key, data, destroy);
+}
+
 /* ptr         pointer to the hash table to be freed
  * destroy()   function to call to free the ->data member (optional) 
  */
 void hash_destroy (HASH **ptr, void (*destroy) (void *))
 {
   int i;
-  HASH *pptr = *ptr;
+  HASH *pptr;
   struct hash_elem *elem, *tmp;
 
+  if (!ptr || !*ptr)
+    return;
+
+  pptr = *ptr;
   for (i = 0 ; i < pptr->nelem; i++)
   {
     for (elem = pptr->table[i]; elem; )
diff -r 4f0a84b954ef -r 7c0e7a0769e4 hash.h
--- a/hash.h    Wed Jan 04 19:45:59 2017 -0800
+++ b/hash.h    Fri Jan 06 14:37:48 2017 -0800
@@ -19,9 +19,15 @@
 #ifndef _HASH_H
 #define _HASH_H
 
+union hash_key
+{
+  const char *strkey;
+  unsigned int intkey;
+};
+
 struct hash_elem
 {
-  const char *key;
+  union hash_key key;
   void *data;
   struct hash_elem *next;
 };
@@ -30,20 +36,27 @@
 {
   int nelem;
   struct hash_elem **table;
-  unsigned int (*hash_string)(const unsigned char *, unsigned int);
-  int (*cmp_string)(const char *, const char *);
+  unsigned int (*gen_hash)(union hash_key, unsigned int);
+  int (*cmp_key)(union hash_key, union hash_key);
 }
 HASH;
 
-#define hash_find(table, key) hash_find_hash(table, table->hash_string 
((unsigned char *)key, table->nelem), key)
+HASH *hash_create (int nelem, int lower);
+HASH *int_hash_create (int nelem);
 
-#define hash_delete(table,key,data,destroy) hash_delete_hash(table, 
table->hash_string ((unsigned char *)key, table->nelem), key, data, destroy)
+int hash_insert (HASH * table, const char *key, void *data, int allow_dup);
+int int_hash_insert (HASH *table, unsigned int key, void *data, int allow_dup);
 
-HASH *hash_create (int nelem, int lower);
-int hash_insert (HASH * table, const char *key, void *data, int allow_dup);
-void *hash_find_hash (const HASH * table, int hash, const char *key);
-void hash_delete_hash (HASH * table, int hash, const char *key, const void 
*data,
-                      void (*destroy) (void *));
+void *hash_find (const HASH *table, const char *key);
+void *int_hash_find (const HASH *table, unsigned int key);
+
+struct hash_elem *hash_find_bucket (const HASH *table, const char *key);
+
+void hash_delete (HASH * table, const char *key, const void *data,
+                  void (*destroy) (void *));
+void int_hash_delete (HASH * table, unsigned int key, const void *data,
+                  void (*destroy) (void *));
+
 void hash_destroy (HASH ** hash, void (*destroy) (void *));
 
 #endif
diff -r 4f0a84b954ef -r 7c0e7a0769e4 imap/command.c
--- a/imap/command.c    Wed Jan 04 19:45:59 2017 -0800
+++ b/imap/command.c    Fri Jan 06 14:37:48 2017 -0800
@@ -855,36 +855,20 @@
   }
 }
 
-/* This should be optimised (eg with a tree or hash) */
-static int uid2msgno (IMAP_DATA* idata, unsigned int uid)
-{
-  int i;
-  
-  for (i = 0; i < idata->ctx->msgcount; i++)
-  {
-    HEADER* h = idata->ctx->hdrs[i];
-    if (HEADER_DATA(h)->uid == uid)
-      return i;
-  }
-  
-  return -1;
-}
-
 /* cmd_parse_search: store SEARCH response for later use */
 static void cmd_parse_search (IMAP_DATA* idata, const char* s)
 {
   unsigned int uid;
-  int msgno;
+  HEADER *h;
 
   dprint (2, (debugfile, "Handling SEARCH\n"));
 
   while ((s = imap_next_word ((char*)s)) && *s != '\0')
   {
-    uid = atoi (s);
-    msgno = uid2msgno (idata, uid);
-    
-    if (msgno >= 0)
-      idata->ctx->hdrs[msgno]->matched = 1;
+    uid = (unsigned int)atoi (s);
+    h = (HEADER *)int_hash_find (idata->uid_hash, uid);
+    if (h)
+      h->matched = 1;
   }
 }
 
diff -r 4f0a84b954ef -r 7c0e7a0769e4 imap/imap.c
--- a/imap/imap.c       Wed Jan 04 19:45:59 2017 -0800
+++ b/imap/imap.c       Fri Jan 06 14:37:48 2017 -0800
@@ -280,6 +280,8 @@
        FREE (&idata->cache[cacheno].path);
       }
 
+      int_hash_delete (idata->uid_hash, HEADER_DATA(h)->uid, h, NULL);
+
       imap_free_header_data ((IMAP_HEADER_DATA**)&h->data);
     }
   }
@@ -1392,6 +1394,7 @@
     /* mailbox may not have fully loaded */
     if (ctx->hdrs[i] && ctx->hdrs[i]->data)
       imap_free_header_data ((IMAP_HEADER_DATA**)&(ctx->hdrs[i]->data));
+  hash_destroy (&idata->uid_hash, NULL);
 
   for (i = 0; i < IMAP_CACHE_LEN; i++)
   {
diff -r 4f0a84b954ef -r 7c0e7a0769e4 imap/imap_private.h
--- a/imap/imap_private.h       Wed Jan 04 19:45:59 2017 -0800
+++ b/imap/imap_private.h       Fri Jan 06 14:37:48 2017 -0800
@@ -214,6 +214,7 @@
   unsigned char reopen;
   unsigned int newMailCount;
   IMAP_CACHE cache[IMAP_CACHE_LEN];
+  HASH *uid_hash;
   unsigned int uid_validity;
   unsigned int uidnext;
   body_cache_t *bcache;
diff -r 4f0a84b954ef -r 7c0e7a0769e4 imap/message.c
--- a/imap/message.c    Wed Jan 04 19:45:59 2017 -0800
+++ b/imap/message.c    Fri Jan 06 14:37:48 2017 -0800
@@ -52,6 +52,23 @@
 static int msg_parse_fetch (IMAP_HEADER* h, char* s);
 static char* msg_parse_flags (IMAP_HEADER* h, char* s);
 
+static void imap_update_context (IMAP_DATA *idata, int oldmsgcount)
+{
+  CONTEXT *ctx;
+  HEADER *h;
+  int msgno;
+
+  ctx = idata->ctx;
+  if (!idata->uid_hash)
+    idata->uid_hash = int_hash_create (MAX (6 * ctx->msgcount / 5, 30));
+
+  for (msgno = oldmsgcount; msgno < ctx->msgcount; msgno++)
+  {
+    h = ctx->hdrs[msgno];
+    int_hash_insert (idata->uid_hash, HEADER_DATA(h)->uid, h, 0);
+  }
+}
+
 /* imap_read_headers:
  * Changed to read many headers instead of just one. It will return the
  * msgno of the last message read. It will return a value other than
@@ -377,6 +394,7 @@
   {
     mx_alloc_memory(ctx);
     mx_update_context (ctx, ctx->msgcount - oldmsgcount);
+    imap_update_context (idata, oldmsgcount);
   }
 
   idata->reopen |= IMAP_REOPEN_ALLOW;
diff -r 4f0a84b954ef -r 7c0e7a0769e4 thread.c
--- a/thread.c  Wed Jan 04 19:45:59 2017 -0800
+++ b/thread.c  Fri Jan 06 14:37:48 2017 -0800
@@ -416,7 +416,6 @@
 {
   struct hash_elem *ptr;
   THREAD *tmp, *last = NULL;
-  unsigned int hash;
   LIST *subjects = NULL, *oldlist;
   time_t date = 0;  
 
@@ -424,9 +423,7 @@
 
   while (subjects)
   {
-    hash = ctx->subj_hash->hash_string ((unsigned char *) subjects->data,
-                                       ctx->subj_hash->nelem);
-    for (ptr = ctx->subj_hash->table[hash]; ptr; ptr = ptr->next)
+    for (ptr = hash_find_bucket (ctx->subj_hash, subjects->data); ptr; ptr = 
ptr->next)
     {
       tmp = ((HEADER *) ptr->data)->thread;
       if (tmp != cur &&                           /* don't match the same 
message */

Reply via email to