Jean-Marc Lasgouttes wrote:
Does the behaviour depend from current locale settings?
No, it doesn't seem so. The same thing happens under LC_ALL=C.
What architecture is it?
All are FreeBSD-6.2-RELEASE with current ports programs.
Can you reproduce it with a minimal C(++) file?
Yeah, I tried and there may be a problem in reference to the output of
libiconv conversion in src/support/unicode.C. Please check if it is
correct or not as I'm an amateur in this matter.
I wrote a small test program in C (test.c) and got a result as in
result.txt (both attached). All conversions ISO-8859-1 <-> UCS etc. by
libiconv seem to work fine, but the output is stored not in outbuf but
pointed address of outbuf (i.e. the variable &out_str in test.c gets the
outcome but not &outbuf).
Can the attached unicode.C.patch be a solution? With the patch, I can
compile lyx (but I got a crash in another place, sigh...). That is, in
unicode.C,
> int IconvProcessor::convert(char const * buf, size_t buflen,
> char * outbuf, size_t maxoutsize)
> {
> ...
> char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(buf);
> + char * outbuf2 = outbuf;
> size_t inbytesleft = buflen;
> size_t outbytesleft = maxoutsize;
>
> - int res = iconv(pimpl_->cd, &inbuf, &inbytesleft, &outbuf,
&outbytesleft);
> + int res = iconv(pimpl_->cd, &inbuf, &inbytesleft, &outbuf2,
&outbytesleft);
I added lines with + in stead of lines with -.
Thanks,
Koji
// cc -liconv -I /usr/local/include -L /usr/local/lib test.c
#include <stdio.h>
#include <string.h>
#include <iconv.h>
main()
{
iconv_t cd;
char in_str[22] = "ABCDE";
char out_str[22] = "";
char out_str2[22] = "";
char const * inbuf = in_str;
char * outbuf = out_str;
size_t inbytesleft = 5;
size_t outbytesleft = 50;
size_t res;
int i;
int arg=1;
char fromcode[11] = "ISO-8859-1";
char tocode[7][12];
strcpy(tocode[0],"UTF-8");
strcpy(tocode[1],"ISO-2022-JP");
strcpy(tocode[2],"CP932");
strcpy(tocode[3],"EUC-JP");
strcpy(tocode[4],"UCS-4LE");
strcpy(tocode[5],"UCS-4BE");
strcpy(tocode[6],"UCS-4");
for(i=0;i<=6;i++){
printf("\n***** Start conversion: %s -> %s -> %s
*****\n",fromcode,tocode[i],fromcode);
printf("***** before 1st conversion: %s -> %s *****\n",fromcode,tocode[i]);
printf("inbuf = %s\t\toutbuf = %s\n", inbuf, outbuf);
cd = iconv_open(tocode[i], fromcode);
res = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
iconv_close(cd);
printf("***** after 1st conversion *****\n");
printf("inbuf = %s\t\tinbytesleft = %d\n", inbuf, inbytesleft);
printf("outbuf = %s\t\toutbytesleft = %d\n", outbuf, outbytesleft);
printf("out_str = %s\t\tres = %d\n", out_str, res);
inbuf = out_str;
outbuf = out_str2;
inbytesleft = 50 - outbytesleft;
outbytesleft = 5;
printf("***** before 2nd conversion: %s -> %s *****\n",tocode[i],fromcode);
printf("inbuf = %s\t\toutbuf = %s\n", inbuf, outbuf);
cd = iconv_open(fromcode, tocode[i]);
res = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
iconv_close(cd);
printf("***** after 2nd conversion *****\n");
printf("inbuf = %s\t\tinbytesleft = %d\n", inbuf, inbytesleft);
printf("outbuf = %s\t\toutbytesleft = %d\n", outbuf, outbytesleft);
printf("out_str = %s\t\tres = %d\n", out_str2, res);
strcpy(out_str,"");
strcpy(out_str2,"");
inbuf = in_str;
outbuf = out_str;
inbytesleft=5;
outbytesleft=50;
}
}
***** Start conversion: ISO-8859-1 -> UTF-8 -> ISO-8859-1 *****
***** before 1st conversion: ISO-8859-1 -> UTF-8 *****
inbuf = ABCDE outbuf =
***** after 1st conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 45
out_str = ABCDE res = 0
***** before 2nd conversion: UTF-8 -> ISO-8859-1 *****
inbuf = ABCDE outbuf =
***** after 2nd conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 0
out_str = ABCDE res = 0
***** Start conversion: ISO-8859-1 -> ISO-2022-JP -> ISO-8859-1 *****
***** before 1st conversion: ISO-8859-1 -> ISO-2022-JP *****
inbuf = ABCDE outbuf =
***** after 1st conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 45
out_str = ABCDE res = 0
***** before 2nd conversion: ISO-2022-JP -> ISO-8859-1 *****
inbuf = ABCDE outbuf =
***** after 2nd conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 0
out_str = ABCDE res = 0
***** Start conversion: ISO-8859-1 -> CP932 -> ISO-8859-1 *****
***** before 1st conversion: ISO-8859-1 -> CP932 *****
inbuf = ABCDE outbuf =
***** after 1st conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 45
out_str = ABCDE res = 0
***** before 2nd conversion: CP932 -> ISO-8859-1 *****
inbuf = ABCDE outbuf =
***** after 2nd conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 0
out_str = ABCDE res = 0
***** Start conversion: ISO-8859-1 -> EUC-JP -> ISO-8859-1 *****
***** before 1st conversion: ISO-8859-1 -> EUC-JP *****
inbuf = ABCDE outbuf =
***** after 1st conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 45
out_str = ABCDE res = 0
***** before 2nd conversion: EUC-JP -> ISO-8859-1 *****
inbuf = ABCDE outbuf =
***** after 2nd conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 0
out_str = ABCDE res = 0
***** Start conversion: ISO-8859-1 -> UCS-4LE -> ISO-8859-1 *****
***** before 1st conversion: ISO-8859-1 -> UCS-4LE *****
inbuf = ABCDE outbuf =
***** after 1st conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 30
out_str = A res = 0
***** before 2nd conversion: UCS-4LE -> ISO-8859-1 *****
inbuf = A outbuf =
***** after 2nd conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 0
out_str = ABCDE res = 0
***** Start conversion: ISO-8859-1 -> UCS-4BE -> ISO-8859-1 *****
***** before 1st conversion: ISO-8859-1 -> UCS-4BE *****
inbuf = ABCDE outbuf =
***** after 1st conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 30
out_str = res = 0
***** before 2nd conversion: UCS-4BE -> ISO-8859-1 *****
inbuf = outbuf =
***** after 2nd conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 0
out_str = ABCDE res = 0
***** Start conversion: ISO-8859-1 -> UCS-4 -> ISO-8859-1 *****
***** before 1st conversion: ISO-8859-1 -> UCS-4 *****
inbuf = ABCDE outbuf =
***** after 1st conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 30
out_str = res = 0
***** before 2nd conversion: UCS-4 -> ISO-8859-1 *****
inbuf = outbuf =
***** after 2nd conversion *****
inbuf = inbytesleft = 0
outbuf = outbytesleft = 0
out_str = ABCDE res = 0
--- src/support/unicode.C.orig Fri May 11 01:00:08 2007
+++ src/support/unicode.C Fri May 11 01:00:08 2007
@@ -126,10 +126,11 @@
}
char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(buf);
+ char * outbuf2 = outbuf;
size_t inbytesleft = buflen;
size_t outbytesleft = maxoutsize;
- int res = iconv(pimpl_->cd, &inbuf, &inbytesleft, &outbuf,
&outbytesleft);
+ int res = iconv(pimpl_->cd, &inbuf, &inbytesleft, &outbuf2,
&outbytesleft);
//lyxerr << std::dec;
//lyxerr << "Inbytesleft: " << inbytesleft << endl;