This adds one more option to the string iconv modules: support for transliteration, as implemented in glibc and GNU libiconv.
2007-01-23 Bruno Haible <[EMAIL PROTECTED]> * lib/striconveha.h: Include <stdbool.h>. (mem_iconveha, str_iconveha): Add 'transliterate' argument. * lib/striconveha.c: Include allocsa.h, strdup.h, c-strcase.h. (mem_iconveha_notranslit): Renamed from mem_iconveha. (mem_iconveha): New function. (str_iconveha_notranslit): Renamed from str_iconveha. (str_iconveha): New function. * modules/striconveha (Depends-on): Add stdbool, allocsa, strdup, c-strcase. *** lib/striconveha.h 23 Jan 2007 01:17:42 -0000 1.3 --- lib/striconveha.h 24 Jan 2007 00:49:48 -0000 *************** *** 19,24 **** --- 19,26 ---- #ifndef _STRICONVEHA_H #define _STRICONVEHA_H + #include <stdbool.h> + #include "striconveh.h" *************** *** 30,35 **** --- 32,40 ---- /* Convert an entire string from one encoding to another, using iconv. The original string is at [SRC,...,SRC+SRCLEN-1]. The "from" encoding can also be a name defined for autodetection. + If TRANSLITERATE is true, transliteration will attempted to avoid conversion + errors, for iconv implementations that support this. Usually you'll choose + TRANSLITERATE = true if HANDLER != iconveh_error. If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this array is filled with offsets into the result, i.e. the character starting at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]], *************** *** 44,49 **** --- 49,55 ---- extern int mem_iconveha (const char *src, size_t srclen, const char *from_codeset, const char *to_codeset, + bool transliterate, enum iconv_ilseq_handler handler, size_t *offsets, char **resultp, size_t *lengthp); *************** *** 53,64 **** --- 59,74 ---- Both the "from" and the "to" encoding must use a single NUL byte at the end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32). The "from" encoding can also be a name defined for autodetection. + If TRANSLITERATE is true, transliteration will attempted to avoid conversion + errors, for iconv implementations that support this. Usually you'll choose + TRANSLITERATE = true if HANDLER != iconveh_error. Allocate a malloced memory block for the result. Return value: the freshly allocated resulting NUL-terminated string if successful, otherwise NULL and errno set. */ extern char * str_iconveha (const char *src, const char *from_codeset, const char *to_codeset, + bool transliterate, enum iconv_ilseq_handler handler); *** lib/striconveha.c 24 Jan 2007 00:48:01 -0000 1.3 --- lib/striconveha.c 24 Jan 2007 00:49:48 -0000 *************** *** 25,30 **** --- 25,34 ---- #include <stdlib.h> #include <string.h> + #include "allocsa.h" + #include "strdup.h" + #include "c-strcase.h" + #define SIZEOF(a) (sizeof(a)/sizeof(a[0])) *************** *** 143,154 **** } } ! int ! mem_iconveha (const char *src, size_t srclen, ! const char *from_codeset, const char *to_codeset, ! enum iconv_ilseq_handler handler, ! size_t *offsets, ! char **resultp, size_t *lengthp) { int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler, offsets, resultp, lengthp); --- 147,159 ---- } } ! /* Like mem_iconveha, except no handling of transliteration. */ ! static int ! mem_iconveha_notranslit (const char *src, size_t srclen, ! const char *from_codeset, const char *to_codeset, ! enum iconv_ilseq_handler handler, ! size_t *offsets, ! char **resultp, size_t *lengthp) { int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler, offsets, resultp, lengthp); *************** *** 171,180 **** encodings = alias->encodings_to_try; do { ! retval = mem_iconveha (src, srclen, ! *encodings, to_codeset, ! iconveh_error, offsets, ! resultp, lengthp); if (!(retval < 0 && errno == EILSEQ)) return retval; encodings++; --- 176,185 ---- encodings = alias->encodings_to_try; do { ! retval = mem_iconveha_notranslit (src, srclen, ! *encodings, to_codeset, ! iconveh_error, offsets, ! resultp, lengthp); if (!(retval < 0 && errno == EILSEQ)) return retval; encodings++; *************** *** 185,194 **** encodings = alias->encodings_to_try; do { ! retval = mem_iconveha (src, srclen, ! *encodings, to_codeset, ! handler, offsets, ! resultp, lengthp); if (!(retval < 0 && errno == EILSEQ)) return retval; encodings++; --- 190,199 ---- encodings = alias->encodings_to_try; do { ! retval = mem_iconveha_notranslit (src, srclen, ! *encodings, to_codeset, ! handler, offsets, ! resultp, lengthp); if (!(retval < 0 && errno == EILSEQ)) return retval; encodings++; *************** *** 205,214 **** } } ! char * ! str_iconveha (const char *src, const char *from_codeset, const char *to_codeset, ! enum iconv_ilseq_handler handler) { char *result = str_iconveh (src, from_codeset, to_codeset, handler); --- 210,261 ---- } } ! int ! mem_iconveha (const char *src, size_t srclen, const char *from_codeset, const char *to_codeset, ! bool transliterate, ! enum iconv_ilseq_handler handler, ! size_t *offsets, ! char **resultp, size_t *lengthp) ! { ! if (srclen == 0) ! { ! /* Nothing to convert. */ ! *lengthp = 0; ! return 0; ! } ! ! /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5, ! we want to use transliteration. */ ! #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105 ! if (transliterate) ! { ! int retval; ! size_t len = strlen (to_codeset); ! char *to_codeset_suffixed = (char *) allocsa (len + 10 + 1); ! memcpy (to_codeset_suffixed, to_codeset, len); ! memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1); ! ! retval = mem_iconveha_notranslit (src, srclen, ! from_codeset, to_codeset_suffixed, ! handler, offsets, resultp, lengthp); ! ! freesa (to_codeset_suffixed); ! ! return retval; ! } ! else ! #endif ! return mem_iconveha_notranslit (src, srclen, ! from_codeset, to_codeset, ! handler, offsets, resultp, lengthp); ! } ! ! /* Like str_iconveha, except no handling of transliteration. */ ! static char * ! str_iconveha_notranslit (const char *src, ! const char *from_codeset, const char *to_codeset, ! enum iconv_ilseq_handler handler) { char *result = str_iconveh (src, from_codeset, to_codeset, handler); *************** *** 231,239 **** encodings = alias->encodings_to_try; do { ! result = str_iconveha (src, ! *encodings, to_codeset, ! iconveh_error); if (!(result == NULL && errno == EILSEQ)) return result; encodings++; --- 278,286 ---- encodings = alias->encodings_to_try; do { ! result = str_iconveha_notranslit (src, ! *encodings, to_codeset, ! iconveh_error); if (!(result == NULL && errno == EILSEQ)) return result; encodings++; *************** *** 244,252 **** encodings = alias->encodings_to_try; do { ! result = str_iconveha (src, ! *encodings, to_codeset, ! handler); if (!(result == NULL && errno == EILSEQ)) return result; encodings++; --- 291,299 ---- encodings = alias->encodings_to_try; do { ! result = str_iconveha_notranslit (src, ! *encodings, to_codeset, ! handler); if (!(result == NULL && errno == EILSEQ)) return result; encodings++; *************** *** 262,264 **** --- 309,349 ---- return NULL; } } + + char * + str_iconveha (const char *src, + const char *from_codeset, const char *to_codeset, + bool transliterate, + enum iconv_ilseq_handler handler) + { + if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0) + { + char *result = strdup (src); + + if (result == NULL) + errno = ENOMEM; + return result; + } + + /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5, + we want to use transliteration. */ + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105 + if (transliterate) + { + char *result; + size_t len = strlen (to_codeset); + char *to_codeset_suffixed = (char *) allocsa (len + 10 + 1); + memcpy (to_codeset_suffixed, to_codeset, len); + memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1); + + result = str_iconveha_notranslit (src, from_codeset, to_codeset_suffixed, + handler); + + freesa (to_codeset_suffixed); + + return result; + } + else + #endif + return str_iconveha_notranslit (src, from_codeset, to_codeset, handler); + } *** modules/striconveha 21 Jan 2007 22:59:19 -0000 1.1 --- modules/striconveha 24 Jan 2007 00:49:49 -0000 *************** *** 7,13 **** --- 7,17 ---- lib/striconveha.c Depends-on: + stdbool striconveh + allocsa + strdup + c-strcase configure.ac: