The two Gnulib hash table modules 'hash', 'hash-map' are not applicable
in many situations, because they require the key to be a pointer. While
a 'void *' pointer is a nice object-oriented abstraction, it cannot
easily be used (without extra memory allocation) when the key is merely
a chunk of memory.

GNU gettext has a module made for this type of key. It's in use for at
least 25 years, and used in many places (finding duplicate msgids,
mapping identifiers to prototype information, managing ITS rule classes,
mapping CSS classes to terminal attributes, etc.). This gives me
confidence that the API is a usable one, that will not need to change
significantly.

Therefore I'm moving this module to Gnulib now.

The second patch renames an internal function of the 'hash' module, so
as to avoid having two functions with the name 'hash_find_entry' in Gnulib.
(This would not be a conflict, just potentially confusing.)


2025-04-29  Bruno Haible  <br...@clisp.org>

        hash: Rename an internal function.
        * lib/hash.c (find_entry): Rename from hash_find_entry.

2025-04-29  Bruno Haible  <br...@clisp.org>

        New module mem-hash-map.
        * lib/mem-hash-map.h: New file, from GNU gettext.
        * lib/mem-hash-map.c: New file, from GNU gettext.
        * modules/mem-hash-map: New file, from GNU gettext.

>From 5a842672e79a7a5f6be837c483be4f9901a4ecc0 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Wed, 30 Apr 2025 03:19:10 +0200
Subject: [PATCH 1/2] New module mem-hash-map.

* lib/mem-hash-map.h: New file, from GNU gettext.
* lib/mem-hash-map.c: New file, from GNU gettext.
* modules/mem-hash-map: New file, from GNU gettext.
---
 ChangeLog            |   7 +
 lib/mem-hash-map.c   | 352 +++++++++++++++++++++++++++++++++++++++++++
 lib/mem-hash-map.h   |  90 +++++++++++
 modules/mem-hash-map |  25 +++
 4 files changed, 474 insertions(+)
 create mode 100644 lib/mem-hash-map.c
 create mode 100644 lib/mem-hash-map.h
 create mode 100644 modules/mem-hash-map

diff --git a/ChangeLog b/ChangeLog
index ee6295754d..fc9bb141c8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2025-04-29  Bruno Haible  <br...@clisp.org>
+
+	New module mem-hash-map.
+	* lib/mem-hash-map.h: New file, from GNU gettext.
+	* lib/mem-hash-map.c: New file, from GNU gettext.
+	* modules/mem-hash-map: New file, from GNU gettext.
+
 2025-04-29  Bruno Haible  <br...@clisp.org>
 
 	next-prime: Add tests.
diff --git a/lib/mem-hash-map.c b/lib/mem-hash-map.c
new file mode 100644
index 0000000000..1cd4f436cc
--- /dev/null
+++ b/lib/mem-hash-map.c
@@ -0,0 +1,352 @@
+/* Simple hash table (no removals) where the keys are memory blocks.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
+   Written by Ulrich Drepper <drep...@gnu.ai.mit.edu>, October 1994.
+
+   This file is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published
+   by the Free Software Foundation, either version 3 of the License,
+   or (at your option) any later version.
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "mem-hash-map.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <limits.h>
+#include <sys/types.h>
+
+#include "next-prime.h"
+
+/* Since this simple implementation of hash tables allows only insertion, no
+   removal of entries, the right data structure for the memory holding all keys
+   is an obstack.  */
+#include "obstack.h"
+
+/* Use checked memory allocation.  */
+#include "xalloc.h"
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+
+typedef struct hash_entry
+{
+  size_t used;         /* Hash code of the key, or 0 for an unused entry.  */
+  const void *key;     /* Key.  */
+  size_t keylen;
+  void *data;          /* Value.  */
+  struct hash_entry *next;
+}
+hash_entry;
+
+
+/* Initialize a hash table.  INIT_SIZE > 1 is the initial number of available
+   entries.
+   Return 0 always.  */
+int
+hash_init (hash_table *htab, size_t init_size)
+{
+  /* We need the size to be a prime.  */
+  init_size = next_prime (init_size);
+
+  /* Initialize the data structure.  */
+  htab->size = init_size;
+  htab->filled = 0;
+  htab->first = NULL;
+  htab->table = XCALLOC (init_size + 1, hash_entry);
+
+  obstack_init (&htab->mem_pool);
+
+  return 0;
+}
+
+
+/* Delete a hash table's contents.
+   Return 0 always.  */
+int
+hash_destroy (hash_table *htab)
+{
+  free (htab->table);
+  obstack_free (&htab->mem_pool, NULL);
+  return 0;
+}
+
+
+/* Compute a hash code for a key consisting of KEYLEN bytes starting at KEY
+   in memory.  */
+static size_t
+compute_hashval (const void *key, size_t keylen)
+{
+  size_t cnt;
+  size_t hval;
+
+  /* Compute the hash value for the given string.  The algorithm
+     is taken from [Aho,Sethi,Ullman], fixed according to
+     https://haible.de/bruno/hashfunc.html.  */
+  cnt = 0;
+  hval = keylen;
+  while (cnt < keylen)
+    {
+      hval = (hval << 9) | (hval >> (sizeof (size_t) * CHAR_BIT - 9));
+      hval += (size_t) *(((const char *) key) + cnt++);
+    }
+  return hval != 0 ? hval : ~((size_t) 0);
+}
+
+
+/* References:
+   [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
+   [Knuth]            The Art of Computer Programming, part3 (6.4) */
+
+/* Look up a given key in the hash table.
+   Return the index of the entry, if present, or otherwise the index a free
+   entry where it could be inserted.  */
+static size_t
+lookup (const hash_table *htab,
+        const void *key, size_t keylen,
+        size_t hval)
+{
+  size_t hash;
+  size_t idx;
+  hash_entry *table = htab->table;
+
+  /* First hash function: simply take the modul but prevent zero.  */
+  hash = 1 + hval % htab->size;
+
+  idx = hash;
+
+  if (table[idx].used)
+    {
+      if (table[idx].used == hval && table[idx].keylen == keylen
+          && memcmp (table[idx].key, key, keylen) == 0)
+        return idx;
+
+      /* Second hash function as suggested in [Knuth].  */
+      hash = 1 + hval % (htab->size - 2);
+
+      do
+        {
+          if (idx <= hash)
+            idx = htab->size + idx - hash;
+          else
+            idx -= hash;
+
+          /* If entry is found use it.  */
+          if (table[idx].used == hval && table[idx].keylen == keylen
+              && memcmp (table[idx].key, key, keylen) == 0)
+            return idx;
+        }
+      while (table[idx].used);
+    }
+  return idx;
+}
+
+
+/* Look up the value of a key in the given table.
+   If found, return 0 and set *RESULT to it.  Otherwise return -1.  */
+int
+hash_find_entry (const hash_table *htab, const void *key, size_t keylen,
+                 void **result)
+{
+  hash_entry *table = htab->table;
+  size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen));
+
+  if (table[idx].used == 0)
+    return -1;
+
+  *result = table[idx].data;
+  return 0;
+}
+
+
+/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table at index IDX.
+   HVAL is the key's hash code.  IDX depends on it.  The table entry at index
+   IDX is known to be unused.  */
+static void
+insert_entry_2 (hash_table *htab,
+                const void *key, size_t keylen,
+                size_t hval, size_t idx, void *data)
+{
+  hash_entry *table = htab->table;
+
+  table[idx].used = hval;
+  table[idx].key = key;
+  table[idx].keylen = keylen;
+  table[idx].data = data;
+
+  /* List the new value in the list.  */
+  if (htab->first == NULL)
+    {
+      table[idx].next = &table[idx];
+      htab->first = &table[idx];
+    }
+  else
+    {
+      table[idx].next = htab->first->next;
+      htab->first->next = &table[idx];
+      htab->first = &table[idx];
+    }
+
+  ++htab->filled;
+}
+
+
+/* Grow the hash table.  */
+static void
+resize (hash_table *htab)
+{
+  size_t old_size = htab->size;
+  hash_entry *table = htab->table;
+  size_t idx;
+
+  htab->size = next_prime (htab->size * 2);
+  htab->filled = 0;
+  htab->first = NULL;
+  htab->table = XCALLOC (1 + htab->size, hash_entry);
+
+  for (idx = 1; idx <= old_size; ++idx)
+    if (table[idx].used)
+      insert_entry_2 (htab, table[idx].key, table[idx].keylen,
+                      table[idx].used,
+                      lookup (htab, table[idx].key, table[idx].keylen,
+                              table[idx].used),
+                      table[idx].data);
+
+  free (table);
+}
+
+
+/* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
+   Return non-NULL (more precisely, the address of the KEY inside the table's
+   memory pool) if successful, or NULL if there is already an entry with the
+   given key.  */
+const void *
+hash_insert_entry (hash_table *htab,
+                   const void *key, size_t keylen,
+                   void *data)
+{
+  size_t hval = compute_hashval (key, keylen);
+  hash_entry *table = htab->table;
+  size_t idx = lookup (htab, key, keylen, hval);
+
+  if (table[idx].used)
+    /* We don't want to overwrite the old value.  */
+    return NULL;
+  else
+    {
+      /* An empty bucket has been found.  */
+      void *keycopy = obstack_copy (&htab->mem_pool, key, keylen);
+      insert_entry_2 (htab, keycopy, keylen, hval, idx, data);
+      if (100 * htab->filled > 75 * htab->size)
+        /* Table is filled more than 75%.  Resize the table.  */
+        resize (htab);
+      return keycopy;
+    }
+}
+
+
+/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
+   Return 0.  */
+int
+hash_set_value (hash_table *htab,
+                const void *key, size_t keylen,
+                void *data)
+{
+  size_t hval = compute_hashval (key, keylen);
+  hash_entry *table = htab->table;
+  size_t idx = lookup (htab, key, keylen, hval);
+
+  if (table[idx].used)
+    {
+      /* Overwrite the old value.  */
+      table[idx].data = data;
+      return 0;
+    }
+  else
+    {
+      /* An empty bucket has been found.  */
+      void *keycopy = obstack_copy (&htab->mem_pool, key, keylen);
+      insert_entry_2 (htab, keycopy, keylen, hval, idx, data);
+      if (100 * htab->filled > 75 * htab->size)
+        /* Table is filled more than 75%.  Resize the table.  */
+        resize (htab);
+      return 0;
+    }
+}
+
+
+/* Steps *PTR forward to the next used entry in the given hash table.  *PTR
+   should be initially set to NULL.  Store information about the next entry
+   in *KEY, *KEYLEN, *DATA.
+   Return 0 normally, -1 when the whole hash table has been traversed.  */
+int
+hash_iterate (hash_table *htab, void **ptr, const void **key, size_t *keylen,
+              void **data)
+{
+  hash_entry *curr;
+
+  if (*ptr == NULL)
+    {
+      if (htab->first == NULL)
+        return -1;
+      curr = htab->first;
+    }
+  else
+    {
+      if (*ptr == htab->first)
+        return -1;
+      curr = (hash_entry *) *ptr;
+    }
+  curr = curr->next;
+  *ptr = (void *) curr;
+
+  *key = curr->key;
+  *keylen = curr->keylen;
+  *data = curr->data;
+  return 0;
+}
+
+
+/* Steps *PTR forward to the next used entry in the given hash table.  *PTR
+   should be initially set to NULL.  Store information about the next entry
+   in *KEY, *KEYLEN, *DATAP.  *DATAP is set to point to the storage of the
+   value; modifying **DATAP will modify the value of the entry.
+   Return 0 normally, -1 when the whole hash table has been traversed.  */
+int
+hash_iterate_modify (hash_table *htab, void **ptr,
+                     const void **key, size_t *keylen,
+                     void ***datap)
+{
+  hash_entry *curr;
+
+  if (*ptr == NULL)
+    {
+      if (htab->first == NULL)
+        return -1;
+      curr = htab->first;
+    }
+  else
+    {
+      if (*ptr == htab->first)
+        return -1;
+      curr = (hash_entry *) *ptr;
+    }
+  curr = curr->next;
+  *ptr = (void *) curr;
+
+  *key = curr->key;
+  *keylen = curr->keylen;
+  *datap = &curr->data;
+  return 0;
+}
diff --git a/lib/mem-hash-map.h b/lib/mem-hash-map.h
new file mode 100644
index 0000000000..abed2017a5
--- /dev/null
+++ b/lib/mem-hash-map.h
@@ -0,0 +1,90 @@
+/* Simple hash table (no removals) where the keys are memory blocks.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
+
+   This file is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published
+   by the Free Software Foundation, either version 3 of the License,
+   or (at your option) any later version.
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#ifndef _GL_MEM_HASH_MAP_H
+#define _GL_MEM_HASH_MAP_H
+
+#include <stddef.h>
+
+#include "obstack.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct hash_entry;
+
+typedef struct hash_table
+{
+  size_t size;              /* Number of allocated entries.  */
+  size_t filled;            /* Number of used entries.  */
+  struct hash_entry *first; /* Pointer to head of list of entries.  */
+  struct hash_entry *table; /* Pointer to array of entries.  */
+  struct obstack mem_pool;  /* Memory pool holding the keys.  */
+}
+hash_table;
+
+/* Initialize a hash table.  INIT_SIZE > 1 is the initial number of available
+   entries.
+   Return 0 always.  */
+extern int hash_init (hash_table *htab, size_t init_size);
+
+/* Delete a hash table's contents.
+   Return 0 always.  */
+extern int hash_destroy (hash_table *htab);
+
+/* Look up the value of a key in the given table.
+   If found, return 0 and set *RESULT to it.  Otherwise return -1.  */
+extern int hash_find_entry (const hash_table *htab,
+                            const void *key, size_t keylen,
+                            void **result);
+
+/* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
+   Return non-NULL (more precisely, the address of the KEY inside the table's
+   memory pool) if successful, or NULL if there is already an entry with the
+   given key.  */
+extern const void * hash_insert_entry (hash_table *htab,
+                                       const void *key, size_t keylen,
+                                       void *data);
+
+/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
+   Return 0.  */
+extern int hash_set_value (hash_table *htab,
+                           const void *key, size_t keylen,
+                           void *data);
+
+/* Steps *PTR forward to the next used entry in the given hash table.  *PTR
+   should be initially set to NULL.  Store information about the next entry
+   in *KEY, *KEYLEN, *DATA.
+   Return 0 normally, -1 when the whole hash table has been traversed.  */
+extern int hash_iterate (hash_table *htab, void **ptr,
+                         const void **key, size_t *keylen,
+                         void **data);
+
+/* Steps *PTR forward to the next used entry in the given hash table.  *PTR
+   should be initially set to NULL.  Store information about the next entry
+   in *KEY, *KEYLEN, *DATAP.  *DATAP is set to point to the storage of the
+   value; modifying **DATAP will modify the value of the entry.
+   Return 0 normally, -1 when the whole hash table has been traversed.  */
+extern int hash_iterate_modify (hash_table *htab, void **ptr,
+                                const void **key, size_t *keylen,
+                                void ***datap);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* not _GL_MEM_HASH_MAP_H */
diff --git a/modules/mem-hash-map b/modules/mem-hash-map
new file mode 100644
index 0000000000..9a97e95dff
--- /dev/null
+++ b/modules/mem-hash-map
@@ -0,0 +1,25 @@
+Description:
+Simple hash table (no removals) where the keys are memory blocks.
+
+Files:
+lib/mem-hash-map.h
+lib/mem-hash-map.c
+
+Depends-on:
+next-prime
+obstack
+xalloc
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += mem-hash-map.h mem-hash-map.c
+
+Include:
+"mem-hash-map.h"
+
+License:
+GPL
+
+Maintainer:
+Bruno Haible
-- 
2.43.0

>From 814e8ce9f21b2071375fca46c96ad89ab8855811 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Wed, 30 Apr 2025 03:31:00 +0200
Subject: [PATCH 2/2] hash: Rename an internal function.

* lib/hash.c (find_entry): Rename from hash_find_entry.
---
 ChangeLog  |  5 +++++
 lib/hash.c | 12 ++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index fc9bb141c8..2d0f93e33d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2025-04-29  Bruno Haible  <br...@clisp.org>
+
+	hash: Rename an internal function.
+	* lib/hash.c (find_entry): Rename from hash_find_entry.
+
 2025-04-29  Bruno Haible  <br...@clisp.org>
 
 	New module mem-hash-map.
diff --git a/lib/hash.c b/lib/hash.c
index 74d016fde3..fd761730b1 100644
--- a/lib/hash.c
+++ b/lib/hash.c
@@ -668,8 +668,8 @@ free_entry (Hash_table *table, struct hash_entry *entry)
    the table, unlink the matching entry.  */
 
 static void *
-hash_find_entry (Hash_table *table, const void *entry,
-                 struct hash_entry **bucket_head, bool delete)
+find_entry (Hash_table *table, const void *entry,
+            struct hash_entry **bucket_head, bool delete)
 {
   struct hash_entry *bucket = safe_hasher (table, entry);
   struct hash_entry *cursor;
@@ -901,13 +901,13 @@ hash_insert_if_absent (Hash_table *table, void const *entry,
   struct hash_entry *bucket;
 
   /* The caller cannot insert a NULL entry, since hash_lookup returns NULL
-     to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
+     to indicate "not found", and find_entry uses "bucket->data == NULL"
      to indicate an empty bucket.  */
   if (! entry)
     abort ();
 
   /* If there's a matching entry already in the table, return that.  */
-  if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
+  if ((data = find_entry (table, entry, &bucket, false)) != NULL)
     {
       if (matched_ent)
         *matched_ent = data;
@@ -946,7 +946,7 @@ hash_insert_if_absent (Hash_table *table, void const *entry,
             return -1;
 
           /* Update the bucket we are interested in.  */
-          if (hash_find_entry (table, entry, &bucket, false) != NULL)
+          if (find_entry (table, entry, &bucket, false) != NULL)
             abort ();
         }
     }
@@ -994,7 +994,7 @@ hash_remove (Hash_table *table, const void *entry)
   void *data;
   struct hash_entry *bucket;
 
-  data = hash_find_entry (table, entry, &bucket, true);
+  data = find_entry (table, entry, &bucket, true);
   if (!data)
     return NULL;
 
-- 
2.43.0

Reply via email to