Hello folks, Found out that the code of convert() in EncodingConversionExample.c wasn’t so clear after all... Fixing that with the code below gives:
07:09:48 (722) menu@ - ~/examples_libxml2 > ./DoEncodingConversionExample.bash --> Compiling EncodingConversionExample.c... 16 -rwxr-xr-x. 1 menu menu 13243 Dec 1 07:10 EncodingConversionExample --> Running EncodingConversionExample: --> encoding = ISO-8859-1 --> content = élàö --> size = 8 --> out_size = 15 --> temp = 7 --> ret = 13 --> temp = 7 --> out_size = 13 conversion was successful. converted: 7 octets. --> out = élà ö <?xml version="1.0" encoding="ISO-8859-1"?> <root>élàö</root> With ISO-8859-7, though: 07:10:05 (722) menu@ - ~/examples_libxml2 > ./DoEncodingConversionExample.bash --> Compiling EncodingConversionExample.c... 16 -rwxr-xr-x. 1 menu menu 13243 Dec 1 07:10 EncodingConversionExample --> Running EncodingConversionExample: --> encoding = ISO-8859-7 --> content = élàö --> size = 8 --> out_size = 15 --> temp = 7 ./DoEncodingConversionExample.bash: line 28: 14446 Segmentation fault (core dumped) ./${EXECUTABLE} "élàö" //----------------------------- #include <string.h> #include <libxml/parser.h> unsigned char* convert (unsigned char *in, char *encoding) { unsigned char *out; int ret,size,out_size,temp; xmlCharEncodingHandlerPtr handler; size = (int)strlen(in)+1; out_size = size*2-1; out = malloc((size_t)out_size); printf("--> size = %d\n", size); printf("--> out_size = %d\n", out_size); if (out) { handler = xmlFindCharEncodingHandler(encoding); if (!handler) { printf("--> %s encoding handler not found\n", encoding); free(out); out = NULL; } } if (out) { temp=size-1; printf("--> temp = %d\n", temp); /** * xmlCharEncodingInputFunc: * @out: a pointer to an array of bytes to store the UTF-8 result * @outlen: the length of @out * @in: a pointer to an array of chars in the original encoding * @inlen: the length of @in * * Take a block of chars in the original encoding and try to convert * it to an UTF-8 block of chars out. * * Returns the number of bytes written, -1 if lack of space, or -2 * if the transcoding failed. * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictiable. * The value of @outlen after return is the number of octets consumed. */ ret = handler->input(out, &out_size, in, &temp); if (ret == -1) { printf("Conversion couldn't be done, lack of space.\n"); } else { if (ret == -2) { printf("Conversion couldn't be done, transcoding failed.\n"); } else { if (ret == 0) { printf("--> temp = %d\n", temp); printf("--> temp = %d\n", temp); printf("--> out_size = %d\n", out_size); printf("conversion wasn't successful. converted: %i octets.\n",temp); free(out); out = NULL; } else { // ret > 0 printf("--> ret = %d\n", ret); printf("--> temp = %d\n", temp); printf("--> out_size = %d\n", out_size); printf("conversion was successful. converted: %i octets.\n",temp); out = realloc(out,out_size+1); out[out_size]=0; /*null terminating out*/ }}} } else { printf("no mem\n"); } return (out); } int main(int argc, char **argv) { unsigned char *content, *out; xmlDocPtr doc; xmlNodePtr rootnode; char *encoding = "ISO-8859-7"; // JMI, was -1 if (argc <= 1) { printf("Usage: %s content\n", argv[0]); return(0); } printf("--> encoding = %s\n", encoding); content = argv[1]; printf("--> content = %s\n", content); out = convert(content, encoding); printf("--> out = %s\n", out); doc = xmlNewDoc ("1.0"); rootnode = xmlNewDocNode(doc, NULL, (const xmlChar*)"root", out); xmlDocSetRootElement(doc, rootnode); xmlSaveFormatFileEnc("-", doc, encoding, 1); return (1); } //----------------------------- > Le 30 nov. 2015 à 23:31, Menu Jacques <imj-...@bluewin.ch> a écrit : > > Hello, > > Same problem on CentOS 7.1. > > My environment contains: > > 23:25:30 (690) menu@ - ~ > echo $LC_TYPE > iso_8859_1 > 23:25:48 (691) menu@ - ~ > echo $LANG > C > 23:26:00 (692) menu@ - ~ > echo GDM_LANG > GDM_LANG > > and I get: > > 23:27:02 (695) menu@ - ~/examples_libxml2 > ./EncodingConversionExample foo > --> content = foo > --> size = 4 > --> out_size = 7 > --> temp = 3 > --> temp-size+1 = 0 > conversion wasn't successful. > --> out = (null) > <?xml version="1.0" encoding="ISO-8859-1"?> > <root/> > 23:27:20 (696) menu@ - ~/examples_libxml2 > ./EncodingConversionExample éöîà > --> content = éöîà > --> size = 9 > --> out_size = 17 > --> temp = 8 > --> temp-size+1 = 0 > conversion wasn't successful. > --> out = (null) > <?xml version="1.0" encoding="ISO-8859-1"?> > <root/> > > I’m clearly doing something wrong, but what? > > JM > >> Le 25 nov. 2015 à 13:08, Menu Jacques <imj-...@bluewin.ch> a écrit : >> >> I use: >> >> 13:07:34 (254) menu@ - ~/libxml2-git > apt list libxml2 libxml2-dev >> Listing... Done >> libxml2/stable,now 2.9.1+dfsg1-5 i386 [installed,automatic] >> libxml2-dev/stable,now 2.9.1+dfsg1-5 i386 [installed] >> >> on Debian 8 32bit (jessie) >> >> JM >> >>> Le 25 nov. 2015 à 11:17, Menu Jacques <imj-...@bluewin.ch> a écrit : >>> >>> Hello folks, >>> >>> I’ve successfully built the examples from >>> http://xmlsoft.org/tutorial/index.html, except the last one about encoding >>> conversion. >>> >>> I added printouts to the original code to help (see below) and get: >>> >>> 10:51:35 (250) menu@ - ~/libxml2-git > ./EncodingConversionExample foo >>> --> content = foo >>> --> size = 4 >>> --> out_size = 7 >>> --> temp = 3 >>> --> temp-size+1 = 0 >>> conversion wasn't successful. >>> --> out = (null) >>> <?xml version="1.0" encoding="ISO-8859-1"?> >>> <root/> >>> >>> Thanks for your help! >>> >>> JM >>> >>> — >>> >>> 11:15:24 (251) menu@ - ~/libxml2-git > cat EncodingConversionExample.c >>> #include <string.h> >>> #include <libxml/parser.h> >>> >>> >>> unsigned char* >>> convert (unsigned char *in, char *encoding) >>> { >>> unsigned char *out; >>> int ret,size,out_size,temp; >>> xmlCharEncodingHandlerPtr handler; >>> >>> size = (int)strlen(in)+1; >>> out_size = size*2-1; >>> out = malloc((size_t)out_size); >>> >>> printf("--> size = %d\n", size); >>> printf("--> out_size = %d\n", out_size); >>> >>> if (out) { >>> handler = xmlFindCharEncodingHandler(encoding); >>> >>> if (!handler) { >>> printf("--> %s encoding handler not found\n", >>> encoding); >>> free(out); >>> out = NULL; >>> } >>> } >>> >>> if (out) { >>> temp=size-1; >>> printf("--> temp = %d\n", temp); >>> >>> ret = handler->input(out, &out_size, in, &temp); >>> >>> if (ret || temp-size+1) { >>> printf("--> temp-size+1 = %d\n", temp-size+1); >>> >>> if (ret) { >>> printf("conversion wasn't successful.\n"); >>> } else { >>> printf("conversion wasn't successful. >>> converted: %i octets.\n",temp); >>> } >>> >>> free(out); >>> out = NULL; >>> } else { >>> out = realloc(out,out_size+1); >>> out[out_size]=0; /*null terminating out*/ >>> >>> } >>> } else { >>> printf("no mem\n"); >>> } >>> return (out); >>> } >>> >>> >>> int >>> main(int argc, char **argv) { >>> >>> unsigned char *content, *out; >>> xmlDocPtr doc; >>> xmlNodePtr rootnode; >>> char *encoding = "ISO-8859-1"; >>> >>> >>> if (argc <= 1) { >>> printf("Usage: %s content\n", argv[0]); >>> return(0); >>> } >>> >>> content = argv[1]; >>> printf("--> content = %s\n", content); >>> >>> out = convert(content, encoding); >>> printf("--> out = %s\n", out); >>> >>> doc = xmlNewDoc ("1.0"); >>> rootnode = xmlNewDocNode(doc, NULL, (const xmlChar*)"root", out); >>> xmlDocSetRootElement(doc, rootnode); >>> >>> xmlSaveFormatFileEnc("-", doc, encoding, 1); >>> return (1); >>> } >>> >> > _______________________________________________ xml mailing list, project page http://xmlsoft.org/ xml@gnome.org https://mail.gnome.org/mailman/listinfo/xml