Here is another patch which changes linear search to bsearch.

With charset aliases of DARWIN7 case I get 10 times reduction
of benchmark execution time.

This patch should be thread-safe, in worst case it would leak
some memory _once_ (just as before).

-- 
   Alexander.
diff --git a/lib/localcharset.c b/lib/localcharset.c
index 38e36ef..55cc360 100644
--- a/lib/localcharset.c
+++ b/lib/localcharset.c
@@ -109,19 +109,31 @@
 #if __STDC__ != 1
 # define volatile /* empty */
 #endif
-/* Pointer to the contents of the charset.alias file, if it has already been
-   read, else NULL.  Its format is:
-   ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0'  */
-static const char * volatile charset_aliases;
+/* Pointer to the ordered array of charset aliases read.
+   Format of a single alias is: ALIAS '\0' CANONICAL '\0' */
+static const char ** volatile charset_aliases;
+/* Aliases count, -1 until charset aliases are read. */
+static int volatile charset_aliases_count = -1;
+/* fallback charset name, "*" entry from aliases */
+static const char * volatile charset_fallback;
+
+static int
+charset_alias_cmp(const void *a,const void *b)
+{
+  return strcmp (*(const char**)a, *(const char**)b);
+}
 
-/* Return a pointer to the contents of the charset.alias file.  */
-static const char *
+/* Reads the contents of the charset.alias file.  */
+static void
 get_charset_aliases (void)
 {
   const char *cp;
+  const char **cps = 0;
+  int cnt = 0;
+  const char *fb = 0;
+  const char *aliases;
 
-  cp = charset_aliases;
-  if (cp == NULL)
+  if (charset_aliases_count < 0)
     {
 #if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined 
__CYGWIN__)
       const char *dir;
@@ -339,10 +351,67 @@ get_charset_aliases (void)
 # endif
 #endif
 
-      charset_aliases = cp;
+      /* build ordered array of aliases and also find fallback */
+      cnt = 0;
+      for (aliases = cp; *aliases != '\0';
+          aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
+       {
+         if (aliases[0] == '*' && aliases[1] == '\0')
+           fb = aliases + strlen (aliases) + 1;
+         else
+           cnt ++;
+       }
+
+      if (cnt > 0)
+       {
+          cps = (const char **) malloc (cnt * sizeof(*cps));
+         if (cps == NULL)
+           {
+             /* out of memory */
+             charset_aliases_count = 0;
+             return;
+           }
+    
+          cnt = 0;
+          for (aliases = cp; *aliases != '\0';
+               aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 
1)
+           {
+              if (!(aliases[0] == '*' && aliases[1] == '\0'))
+               cps[cnt++] = aliases;
+           }
+    
+          qsort (cps, cnt, sizeof(*cps), charset_alias_cmp);
+       }
+
+      charset_aliases = cps;
+      charset_fallback = fb;
+      charset_aliases_count = cnt;
     }
+}
+
+/* Replace codeset name using the alias table.
+   The result must not be freed; it is statically allocated.
+*/
+
+static const char *
+resolve_alias (const char *codeset)
+{
+  const char **alias_ptr = 0;
+
+  get_charset_aliases ();
+  
+  if (charset_aliases_count > 0)
+    alias_ptr = bsearch (&codeset, charset_aliases,
+                        charset_aliases_count, sizeof(codeset),
+                        charset_alias_cmp);
+  
+  if (alias_ptr != NULL)
+    return *alias_ptr + strlen (*alias_ptr) + 1;
+
+  if (charset_fallback != NULL)
+    return charset_fallback;
 
-  return cp;
+  return codeset;
 }
 
 /* Determine the current locale's character encoding, and canonicalize it
@@ -358,7 +427,6 @@ const char *
 locale_charset (void)
 {
   const char *codeset;
-  const char *aliases;
 
 #if !(defined WIN32_NATIVE || defined OS2)
 
@@ -526,16 +594,7 @@ locale_charset (void)
     /* The canonical name cannot be determined.  */
     codeset = "";
 
-  /* Resolve alias. */
-  for (aliases = get_charset_aliases ();
-       *aliases != '\0';
-       aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
-    if (strcmp (codeset, aliases) == 0
-        || (aliases[0] == '*' && aliases[1] == '\0'))
-      {
-        codeset = aliases + strlen (aliases) + 1;
-        break;
-      }
+  codeset = resolve_alias (codeset);
 
   /* Don't return an empty string.  GNU libc and GNU libiconv interpret
      the empty string as denoting "the locale's character encoding",

Reply via email to