Hi, This is a schoolbook case about API design. The function mem_cd_iconv returns a block of memory and its length. There are 3 conventions of doing so, known from glibc:
int foo (..., &result, &result_length); a) (like vasprintf:) Allocate a fresh block of memory always. b) (like vasnprintf:) Allocate a fresh block of memory if and only if it is larger than the previous block of memory passed in by the caller. When a fresh block is needed, don't realloc the previous block of memory. c) (like getline:) Allocate a fresh block of memory if and only if it is larger than the previous block of memory passed in by the caller. When a fresh block is needed, use realloc. The convention for mem_cd_iconv is like this: *RESULTP should initially contain NULL or a malloced memory block. May change the size of the allocated memory block in *RESULTP, storing its new address in *RESULTP and its new length in *LENGTHP. Return value: 0 if successful, otherwise -1 and errno set. If successful, the resulting string is stored in *RESULTP and its length in *LENGTHP. It was apparently designed with these principles in mind: - (a) is inefficient, there must be a way to reuse the previous block. - (b) is inefficient: new=realloc(old,n) is more efficient than new=malloc(n), free(old); - It is good to shrink the result, so it occupies the minimum amount of memory. The current convention for mem_cd_iconv thus is: d) Allocate a fresh block of memory if and only if no previous block was passed in by the caller. When a fresh block is needed, use realloc; otherwise use realloc to shrink to the minimum size. No need to pass the old length. And it is useless. It becomes clear when we look at the situation in which the function can be used. (a) is problematic when the function is quick - the malloc time can become significative. (b) is useful to avoid a memory allocation entirely - by passing a static or stack-allocated buffer. (c) is useful in a loop, that calls the same function over and over again. The usecase of a loop can also use the API of (b) - and use free(old) after a new line was allocated. And (d) is not useful at all. So I'm changing mem_cd_iconv to use convention (b). Bruno 2007-01-21 Bruno Haible <[EMAIL PROTECTED]> * lib/striconv.h (mem_cd_iconv): Change specification. * lib/striconv.c (mem_cd_iconv): Don't free the user-supplied original result buffer. (str_cd_iconv): Update. * tests/test-striconv.c (main): Update. *** lib/striconv.h 6 Sep 2006 12:21:39 -0000 1.1 --- lib/striconv.h 21 Jan 2007 20:33:40 -0000 *************** *** 1,5 **** /* Charset conversion. ! Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. This program is free software; you can redistribute it and/or modify --- 1,5 ---- /* Charset conversion. ! Copyright (C) 2001-2004, 2006-2007 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. This program is free software; you can redistribute it and/or modify *************** *** 35,46 **** /* Convert an entire string from one encoding to another, using iconv. The original string is at [SRC,...,SRC+SRCLEN-1]. The conversion descriptor is passed as CD. ! *RESULTP should initially contain NULL or a malloced memory block. ! May change the size of the allocated memory block in *RESULTP, storing ! its new address in *RESULTP and its new length in *LENGTHP. Return value: 0 if successful, otherwise -1 and errno set. ! If successful, the resulting string is stored in *RESULTP and its length ! in *LENGTHP. */ extern int mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, char **resultp, size_t *lengthp); --- 35,47 ---- /* Convert an entire string from one encoding to another, using iconv. The original string is at [SRC,...,SRC+SRCLEN-1]. The conversion descriptor is passed as CD. ! *RESULTP and *LENGTH should initially be a scratch buffer and its size, ! or *RESULTP can initially be NULL. ! May erase the contents of the memory at *RESULTP. Return value: 0 if successful, otherwise -1 and errno set. ! If successful: The resulting string is stored in *RESULTP and its length ! in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is ! unchanged if no dynamic memory allocation was necessary. */ extern int mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, char **resultp, size_t *lengthp); *** lib/striconv.c 16 Jan 2007 03:25:12 -0000 1.7 --- lib/striconv.c 21 Jan 2007 20:33:40 -0000 *************** *** 118,132 **** *lengthp = 0; return 0; } ! result = ! (char *) (*resultp != NULL ? realloc (*resultp, length) : malloc (length)); ! if (result == NULL) { ! errno = ENOMEM; ! return -1; } - *resultp = result; - *lengthp = length; /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ # if defined _LIBICONV_VERSION \ --- 118,134 ---- *lengthp = 0; return 0; } ! if (*resultp != NULL && *lengthp >= length) ! result = *resultp; ! else { ! result = (char *) malloc (length); ! if (result == NULL) ! { ! errno = ENOMEM; ! return -1; ! } } /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ # if defined _LIBICONV_VERSION \ *************** *** 153,159 **** if (errno == EINVAL) break; else ! return -1; } # if !defined _LIBICONV_VERSION && !defined __GLIBC__ /* Irix iconv() inserts a NUL byte if it cannot convert. --- 155,161 ---- if (errno == EINVAL) break; else ! goto fail; } # if !defined _LIBICONV_VERSION && !defined __GLIBC__ /* Irix iconv() inserts a NUL byte if it cannot convert. *************** *** 163,169 **** else if (res > 0) { errno = EILSEQ; ! return -1; } # endif } --- 165,171 ---- else if (res > 0) { errno = EILSEQ; ! goto fail; } # endif } *************** *** 174,187 **** size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); if (res == (size_t)(-1)) ! return -1; } # endif if (outsize != 0) abort (); } return 0; # undef tmpbufsize } --- 176,203 ---- size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); if (res == (size_t)(-1)) ! goto fail; } # endif if (outsize != 0) abort (); } + *resultp = result; + *lengthp = length; + return 0; + + fail: + { + if (result != *resultp) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return -1; + } # undef tmpbufsize } *************** *** 202,219 **** Therefore we cannot use the second, faster algorithm. */ char *result = NULL; ! size_t length; int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length); char *final_result; if (retval < 0) { if (result != NULL) ! { ! int saved_errno = errno; ! free (result); ! errno = saved_errno; ! } return NULL; } --- 218,231 ---- Therefore we cannot use the second, faster algorithm. */ char *result = NULL; ! size_t length = 0; int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length); char *final_result; if (retval < 0) { if (result != NULL) ! abort (); return NULL; } *** tests/test-striconv.c 14 Jan 2007 23:38:54 -0000 1.1 --- tests/test-striconv.c 21 Jan 2007 20:33:42 -0000 *************** *** 52,58 **** static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; char *result = NULL; ! size_t length; int retval = mem_cd_iconv (input, strlen (input), cd_88591_to_utf8, &result, &length); ASSERT (retval == 0); --- 52,58 ---- static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; char *result = NULL; ! size_t length = 0; int retval = mem_cd_iconv (input, strlen (input), cd_88591_to_utf8, &result, &length); ASSERT (retval == 0); *************** *** 66,72 **** static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; char *result = NULL; ! size_t length; int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591, &result, &length); ASSERT (retval == 0); --- 66,72 ---- static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; char *result = NULL; ! size_t length = 0; int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591, &result, &length); ASSERT (retval == 0); *************** *** 79,85 **** { static const char input[] = "\342\202\254"; /* EURO SIGN */ char *result = NULL; ! size_t length; int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591, &result, &length); ASSERT (retval == -1 && errno == EILSEQ); --- 79,85 ---- { static const char input[] = "\342\202\254"; /* EURO SIGN */ char *result = NULL; ! size_t length = 0; int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591, &result, &length); ASSERT (retval == -1 && errno == EILSEQ); *************** *** 90,96 **** { static const char input[] = "\342"; char *result = NULL; ! size_t length; int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591, &result, &length); ASSERT (retval == 0); --- 90,96 ---- { static const char input[] = "\342"; char *result = NULL; ! size_t length = 0; int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591, &result, &length); ASSERT (retval == 0);