Changeset: 104cf6f8b939 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/104cf6f8b939 Added Files: clients/mapiclient/iconv-stream.h Removed Files: common/stream/iconv_stream.c Modified Files: clients/Tests/exports.stable.out clients/examples/C/streamcat.c clients/mapiclient/CMakeLists.txt clients/mapiclient/mclient.c common/stream/CMakeLists.txt common/stream/monetdb-stream.pc.in common/stream/stdio_stream.c common/stream/stream.h common/stream/stream_internal.h testing/CMakeLists.txt testing/Mtest.py.in Branch: ascii-flag Log Message:
Move iconv support out of stream library so that mserver doesn't carry it around. diffs (truncated from 1051 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -1680,8 +1680,6 @@ FILE *getFile(stream *s); int getFileNo(stream *s); size_t getFileSize(stream *s); stream *gz_stream(stream *inner, int preset); -stream *iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name); -stream *iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name); bool isa_block_stream(const stream *s); stream *lz4_stream(stream *inner, int preset); stream *mapi_request_download(const char *filename, bool binary, bstream *rs, stream *ws); diff --git a/clients/examples/C/streamcat.c b/clients/examples/C/streamcat.c --- a/clients/examples/C/streamcat.c +++ b/clients/examples/C/streamcat.c @@ -473,6 +473,40 @@ opener_wastream(char *filename) } +#ifdef HAVE_ICONV +#include "iconv-stream.h" +#else +static stream * +iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name) +{ + if (ss == NULL || charset == NULL || name == NULL) + return NULL; + if (ss->isutf8 || + strcmp(charset, "utf-8") == 0 || + strcmp(charset, "UTF-8") == 0 || + strcmp(charset, "UTF8") == 0) + return ss; + + fprintf(stderr, "ICONV support has been left out of this MonetDB"); + return NULL; +} + +static stream * +iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name) +{ + if (ss == NULL || charset == NULL || name == NULL) + return NULL; + if (ss->isutf8 || + strcmp(charset, "utf-8") == 0 || + strcmp(charset, "UTF-8") == 0 || + strcmp(charset, "UTF8") == 0) + return ss; + + fprintf(stderr, "ICONV support has been left out of this MonetDB"); + return NULL; +} +#endif + static stream * wrapper_read_iconv(stream *s, char *enc) { diff --git a/clients/mapiclient/CMakeLists.txt b/clients/mapiclient/CMakeLists.txt --- a/clients/mapiclient/CMakeLists.txt +++ b/clients/mapiclient/CMakeLists.txt @@ -39,10 +39,12 @@ target_sources(mclient ReadlineTools.h mhelp.c mhelp.h + iconv-stream.h curl-stream.h) target_include_directories(mclient PRIVATE + $<$<BOOL:${Iconv_IS_BUILT_IN}>:${CMAKE_REQUIRED_INCLUDES}> $<$<PLATFORM_ID:Windows>:${HAVE_GETOPT_H}>) target_link_libraries(mclient diff --git a/clients/mapiclient/iconv-stream.h b/clients/mapiclient/iconv-stream.h new file mode 100644 --- /dev/null +++ b/clients/mapiclient/iconv-stream.h @@ -0,0 +1,256 @@ +#include <iconv.h> + +struct ic_priv_t { + stream *s; + iconv_t cd; + bool eof; + size_t buflen; + char buffer[BUFSIZ]; +}; + +static ssize_t +ic_read(void *restrict private, void *restrict buf, size_t elmsize, size_t cnt) +{ + struct ic_priv_t *ic = private; + char *inbuf = ic->buffer; + size_t inbytesleft = ic->buflen; + char *outbuf = buf; + size_t outbytesleft = elmsize * cnt; + + if (outbytesleft == 0) + return 0; + while (outbytesleft > 0 && !ic->eof) { + if (ic->buflen == sizeof(ic->buffer)) { + /* ridiculously long multibyte sequence, return error */ + fprintf(stderr, "multibyte sequence too long"); + return -1; + } + + switch (mnstr_read(ic->s, ic->buffer + ic->buflen, 1, 1)) { + case 1: + /* expected: read one byte */ + ic->buflen++; + inbytesleft++; + break; + case 0: + /* end of file */ + ic->eof = true; + if (ic->buflen > 0) { + /* incomplete input */ + fprintf(stderr, "incomplete input"); + return -1; + } + if (iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1) { + /* some error occurred */ + fprintf(stderr, "iconv reported an error"); + return -1; + } + goto exit_func; /* double break */ + default: + /* error */ + if (mnstr_peek_error(ic->s)) + fprintf(stderr, "%s\n", mnstr_peek_error(ic->s)); + mnstr_clearerr(ic->s); + return -1; + } + if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) { + switch (errno) { + case EILSEQ: + fprintf(stderr, "invalid multibyte sequence"); + return -1; + case EINVAL: + /* incomplete multibyte sequence encountered */ + break; + case E2BIG: + /* not enough space in output buffer, + * return what we have, saving what's in + * the buffer */ + goto exit_func; + default: + fprintf(stderr, "iconv reported an error"); + return -1; + } + } + if (inbytesleft == 0) { + /* converted complete buffer */ + inbuf = ic->buffer; + ic->buflen = 0; + } + } + exit_func: + if (inbuf > ic->buffer) + memmove(ic->buffer, inbuf, inbytesleft); + ic->buflen = inbytesleft; + if (outbytesleft == elmsize * cnt && !mnstr_eof(ic->s)) { + /* if we're returning data, we must pass on EOF on the + * next call (i.e. keep ic->eof set), otherwise we + * must clear it so that the next call will cause the + * underlying stream to be read again */ + ic->eof = false; + } + return (ssize_t) ((elmsize * cnt - outbytesleft) / elmsize); +} + +static ssize_t +ic_write(void *restrict private, const void *restrict buf, size_t elmsize, size_t cnt) +{ + struct ic_priv_t *ic = private; + char *inbuf = (char *) buf; /* iconv requires non-const */ + size_t inbytesleft = elmsize * cnt; + char *bf = NULL; + + if (ic == NULL) { + fprintf(stderr, "stream already ended"); + goto bailout; + } + + /* if unconverted data from a previous call remains, add it to + * the start of the new data, using temporary space */ + if (ic->buflen > 0) { + bf = malloc(ic->buflen + inbytesleft); + if (bf == NULL) { + /* cannot allocate memory */ + fprintf(stderr, "out of memory"); + goto bailout; + } + memcpy(bf, ic->buffer, ic->buflen); + memcpy(bf + ic->buflen, buf, inbytesleft); + buf = bf; + inbytesleft += ic->buflen; + ic->buflen = 0; + } + while (inbytesleft > 0) { + char *outbuf = ic->buffer; + size_t outbytesleft = sizeof(ic->buffer); + + if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) { + switch (errno) { + case EILSEQ: + /* invalid multibyte sequence encountered */ + fprintf(stderr, "invalid multibyte sequence"); + goto bailout; + case EINVAL: + /* incomplete multibyte sequence + * encountered flush what has been + * converted */ + if (outbytesleft < sizeof(ic->buffer) && + mnstr_write(ic->s, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) { + fprintf(stderr, "incomplete multibyte sequence"); + goto bailout; + } + /* remember what hasn't been converted */ + if (inbytesleft > sizeof(ic->buffer)) { + /* ridiculously long multibyte + * sequence, so return + * error */ + fprintf(stderr, "multibyte sequence too long"); + goto bailout; + } + memcpy(ic->buffer, inbuf, inbytesleft); + ic->buflen = inbytesleft; + if (bf) + free(bf); + return (ssize_t) cnt; + case E2BIG: + /* not enough space in output buffer */ + break; + default: + fprintf(stderr, "iconv reported an error"); + goto bailout; + } + } + if (mnstr_write(ic->s, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) { + const char *e = mnstr_peek_error(ic->s); + if (e) + fprintf(stderr, "%s\n", e); + mnstr_clearerr(ic->s); + goto bailout; + } + } + if (bf) + free(bf); + return (ssize_t) cnt; + + bailout: + if (bf) + free(bf); + return -1; +} + +static void +ic_close(void *private) +{ + struct ic_priv_t *ic = private; + if (ic->cd != (iconv_t) -1) + iconv_close(ic->cd); + ic->cd = (iconv_t) -1; +} + +static void +ic_destroy(void *private) +{ + ic_close(private); + free(private); +} + +static stream * +iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name) +{ + if (ss == NULL || charset == NULL || name == NULL) + return NULL; + + struct ic_priv_t *priv = malloc(sizeof(struct ic_priv_t)); + if (priv == NULL) { + fprintf(stderr, "Cannot allocate memory\n"); + return NULL; + } + *priv = (struct ic_priv_t) { + .s = ss, + .cd = iconv_open("utf-8", charset), + }; + if (priv->cd == (iconv_t) -1) { + free(priv); + fprintf(stderr, "Cannot initiate character set translation from %s\n", + charset); + return NULL; + } + stream *s = callback_stream(priv, ic_read, NULL, ic_close, ic_destroy, name); + if (s == NULL) { + fprintf(stderr, "Cannot allocate memory\n"); + iconv_close(priv->cd); + free(priv); + return NULL; + } _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org