Am Dienstag, 5. September 2006 14:35 schrieb Abdelrazak Younes: > Peter Kümmel wrote: > > In my ordinary text editor (ultraedit) it doen't loock like a text file. > > Same for me in wordpad... attached the file.
Can you please try this updated test program? The screen output should look like in stream.log. I attached also the generated file. In the old version I forgot a const for two virtual methods, therefore the versions of thwe base class were called. I believe that this version should work on any OS. Georg
e4 f6 fc c4 d6 dc from: 0x8053470 inbytesleft: 88 outbytesleft: 132 20 61 62 63 64 a 20 e4 f6 fc a 20 c4 d6 dc a 20 61 62 63 64 a 20 0 0 0 61 0 0 0 62 0 0 0 63 0 0 0 64 0 0 0 a 0 0 0 20 0 0 0 e4 0 0 0 f6 0 0 0 fc 0 0 0 a 0 0 0 20 0 0 0 c4 0 0 0 d6 0 0 0 dc 0 0 0 a 0 0 0 20 0 0 0 61 0 0 0 62 0 0 0 63 0 0 0 64 0 0 0 a 0 0 0 inbytesleft: 0 outbytesleft: 104 20 61 62 63 64 a 20 ffffffc3 ffffffa4 ffffffc3 ffffffb6 ffffffc3 ffffffbc a 20 ffffffc3 ffffff84 ffffffc3 ffffff96 ffffffc3 ffffff9c a 20 61 62 63 64 a
abcd äöü ÃÃà abcd
#include <cerrno> #include <cstdio> #include <iconv.h> #include <iostream> #include <fstream> #include <locale> #define ICONV_CONST namespace boost { typedef unsigned int uint32_t; } namespace std { #ifdef __GNUC__ // We get undefined references to these virtual methods. This looks like // a bug in gcc. The implementation here does not do anything useful, since // it is overriden in utf8_codecvt_facet and ascii_ctype_facet. template<> codecvt<boost::uint32_t, char, mbstate_t>::result codecvt<boost::uint32_t, char, mbstate_t>::do_out(mbstate_t &, const boost::uint32_t *, const boost::uint32_t *, const boost::uint32_t *&, char *, char *, char *&) const { return error; } template<> codecvt<boost::uint32_t, char, mbstate_t>::result codecvt<boost::uint32_t, char, mbstate_t>::do_unshift(mbstate_t &, char *, char *, char *&) const { return error; } template<> codecvt<boost::uint32_t, char, mbstate_t>::result codecvt<boost::uint32_t, char, mbstate_t>::do_in(mbstate_t &, const char *, const char *, const char *&, boost::uint32_t*, boost::uint32_t*, boost::uint32_t*&) const { return error; } template<> int codecvt<boost::uint32_t, char, mbstate_t>::do_encoding() const throw() { return 0; } template<> bool codecvt<boost::uint32_t, char, mbstate_t>::do_always_noconv() const throw() { return true; } template<> int codecvt<boost::uint32_t, char, mbstate_t>::do_length(mbstate_t &, const char *, const char *, size_t) const { return 1; } template<> int codecvt<boost::uint32_t, char, mbstate_t>::do_max_length() const throw() { return 4; } template<> ctype<boost::uint32_t>::~ctype() {} template<> bool ctype<boost::uint32_t>::do_is(ctype<boost::uint32_t>::mask, boost::uint32_t) const { return false; } template<> boost::uint32_t const * ctype<boost::uint32_t>::do_is(const boost::uint32_t *, const boost::uint32_t *, ctype<boost::uint32_t>::mask *) const { return 0; } template<> const boost::uint32_t * ctype<boost::uint32_t>::do_scan_is(ctype<boost::uint32_t>::mask, const boost::uint32_t *, const boost::uint32_t *) const { return 0; } template<> const boost::uint32_t * ctype<boost::uint32_t>::do_scan_not(ctype<boost::uint32_t>::mask, const boost::uint32_t *, const boost::uint32_t *) const { return 0; } template<> boost::uint32_t ctype<boost::uint32_t>::do_toupper(boost::uint32_t) const { return 0; } template<> const boost::uint32_t * ctype<boost::uint32_t>::do_toupper(boost::uint32_t *, boost::uint32_t const *) const { return 0; } template<> boost::uint32_t ctype<boost::uint32_t>::do_tolower(boost::uint32_t) const { return 0; } template<> const boost::uint32_t * ctype<boost::uint32_t>::do_tolower(boost::uint32_t *, boost::uint32_t const *) const { return 0; } template<> boost::uint32_t ctype<boost::uint32_t>::do_widen(char) const { return 0; } template<> const char * ctype<boost::uint32_t>::do_widen(const char *, const char *, boost::uint32_t *) const { return 0; } template<> char ctype<boost::uint32_t>::do_narrow(const boost::uint32_t, char) const { return 0; } template<> const boost::uint32_t * ctype<boost::uint32_t>::do_narrow(const boost::uint32_t *, const boost::uint32_t *, char, char *) const { return 0; } #endif } namespace lyx { typedef boost::uint32_t char_type; } // codecvt_facet for conversion of lyx::char_type (internal representation) to UTF8 (external representation) class utf8_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t> { typedef std::codecvt<lyx::char_type, char, std::mbstate_t> base; public: explicit utf8_codecvt_facet(size_t refs = 0); protected: virtual ~utf8_codecvt_facet(); virtual result do_out(state_type &, intern_type const * from, intern_type const * from_end, intern_type const *& from_next, extern_type * to, extern_type * to_end, extern_type *& to_next) const; virtual result do_unshift(state_type &, extern_type * to, extern_type *, extern_type *& to_next) const; virtual result do_in(state_type &, extern_type const * from, extern_type const * from_end, extern_type const *& from_next, intern_type * to, intern_type * to_end, intern_type *& to_next) const; virtual int do_encoding() const throw(); virtual bool do_always_noconv() const throw(); virtual int do_length(state_type & state, extern_type const * from, extern_type const * end, size_t max) const; virtual int do_max_length() const throw(); private: inline base::result do_iconv(iconv_t cd, char const ** from, size_t * inbytesleft, char ** to, size_t * outbytesleft) const { fprintf(stderr, "from: %p inbytesleft: %d outbytesleft: %d\n", *from, *inbytesleft, *outbytesleft); for (size_t i = 0; i < *inbytesleft / sizeof(intern_type); ++i) { intern_type const * buf = reinterpret_cast<intern_type const *>(*from); unsigned int c = buf[i]; fprintf(stderr, "%x ", c); } fprintf(stderr, "\n"); for (size_t i = 0; i < *inbytesleft; ++i) { unsigned char const * buf = reinterpret_cast<unsigned char const *>(*from); unsigned int c = buf[i]; fprintf(stderr, "%x ", c); } fprintf(stderr, "\n"); char const * to_start = *to; size_t converted = iconv(cd, const_cast<char ICONV_CONST **>(from), inbytesleft, to, outbytesleft); if (converted == (size_t)(-1)) { fprintf(stderr, "Error %d returned from iconv: %s\n", errno, strerror(errno)); switch(errno) { case EINVAL: case E2BIG: fprintf(stderr, "partial result. inbytesleft: %d outbytesleft: %d\n", *inbytesleft, *outbytesleft); fflush(stderr); return base::partial; case EILSEQ: default: fprintf(stderr, "error result. inbytesleft: %d outbytesleft: %d\n", *inbytesleft, *outbytesleft); fflush(stderr); return base::error; } } fprintf(stderr, " inbytesleft: %d outbytesleft: %d\n", *inbytesleft, *outbytesleft); fflush(stderr); for (size_t i = 0; i < size_t(*to - to_start); ++i) { unsigned int c = to_start[i]; fprintf(stderr, "%x ", c); } fprintf(stderr, "\n"); if (*to == to_start) return base::noconv; return base::ok; } mutable iconv_t in_cd_; mutable iconv_t out_cd_; }; utf8_codecvt_facet::utf8_codecvt_facet(size_t refs) : base(refs), in_cd_((iconv_t)(-1)), out_cd_((iconv_t)(-1)) { } utf8_codecvt_facet::~utf8_codecvt_facet() { if (in_cd_ != (iconv_t)(-1)) if (iconv_close(in_cd_) == -1) { fprintf(stderr, "Error %d returned from iconv_close(in_cd_): %s\n", errno, strerror(errno)); fflush(stderr); } if (out_cd_ != (iconv_t)(-1)) if (iconv_close(out_cd_) == -1) { fprintf(stderr, "Error %d returned from iconv_close(out_cd_): %s\n", errno, strerror(errno)); fflush(stderr); } } utf8_codecvt_facet::result utf8_codecvt_facet::do_out(state_type &, intern_type const * from, intern_type const * from_end, intern_type const *& from_next, extern_type * to, extern_type * to_end, extern_type *& to_next) const { if (out_cd_ == (iconv_t)(-1)) { out_cd_ = iconv_open("UTF-8", "UCS-4LE"); if (out_cd_ == (iconv_t)(-1)) { fprintf(stderr, "Error %d returned from iconv_open(out_cd_): %s\n", errno, strerror(errno)); fflush(stderr); throw std::exception(); } } size_t inbytesleft = (from_end - from) * sizeof(intern_type); size_t outbytesleft = (to_end - to) * sizeof(extern_type); from_next = from; to_next = to; return do_iconv(out_cd_, reinterpret_cast<char const **>(&from_next), &inbytesleft, &to_next, &outbytesleft); } utf8_codecvt_facet::result utf8_codecvt_facet::do_unshift(state_type &, extern_type * to, extern_type *, extern_type *& to_next) const { // utf8 does not use shifting to_next = to; return base::noconv; } utf8_codecvt_facet::result utf8_codecvt_facet::do_in(state_type &, extern_type const * from, extern_type const * from_end, extern_type const *& from_next, intern_type * to, intern_type * to_end, intern_type *& to_next) const { if (in_cd_ == (iconv_t)(-1)) { in_cd_ = iconv_open("UCS-4", "UTF-8"); if (in_cd_ == (iconv_t)(-1)) { fprintf(stderr, "Error %d returned from iconv_open(in_cd_): %s\n", errno, strerror(errno)); fflush(stderr); throw std::exception(); } } size_t inbytesleft = (from_end - from) * sizeof(extern_type); size_t outbytesleft = (to_end - to) * sizeof(intern_type); from_next = from; to_next = to; return do_iconv(in_cd_, &from_next, &inbytesleft, reinterpret_cast<char **>(&to_next), &outbytesleft); } int utf8_codecvt_facet::do_encoding() const throw() { return 0; } bool utf8_codecvt_facet::do_always_noconv() const throw() { return false; } int utf8_codecvt_facet::do_length(state_type & /*state*/, extern_type const * from, extern_type const * end, size_t max) const { #if 0 intern_type * to = new intern_type[max]; intern_type * to_end = to + max; intern_type * to_next = to; extern_type const * from_next = from; do_in(state, from, end, from_next, to, to_end, to_next); delete[] to; return to_next - to; #endif size_t const length = end - from; return std::min(length, max); } int utf8_codecvt_facet::do_max_length() const throw() { // UTF8 uses at most 6 bytes to represent one code point return 6; } // ctype facet for UCS4 streams. Widening and narrowing is restricted to // ASCII, since we do not need anything else. class ascii_ctype_facet : public std::ctype<lyx::char_type> { public: typedef lyx::char_type char_type; public: explicit ascii_ctype_facet(size_t refs = 0) : std::ctype<char_type>(refs) {} protected: virtual ~ascii_ctype_facet() {} virtual char_type do_widen(char c) const { if (static_cast<unsigned char>(c) < 128) return c; throw std::bad_cast(); } virtual const char* do_widen(const char* lo, const char* hi, char_type* dest) const { while (lo < hi) { if (static_cast<unsigned char>(*lo) >= 128) throw std::bad_cast(); *dest = *lo; ++lo; ++dest; } return hi; } virtual char do_narrow(char_type wc, char dfault) const { if (wc < 128) return wc; if (wctob(wc) == EOF) return dfault; throw std::bad_cast(); } virtual const char_type * do_narrow(const char_type * lo, const char_type * hi, char dfault, char * dest) const { while (lo < hi) { if (*lo < 128) *dest = *lo; else { if (wctob(*lo) == EOF) *dest = dfault; else throw std::bad_cast(); } ++lo; ++dest; } return hi; } }; std::basic_ostream<lyx::char_type> & operator<<(std::basic_ostream<lyx::char_type> & os, char c) { return os.put(lyx::char_type(c)); } int main() { std::locale const utf8_1(std::locale("C"), new utf8_codecvt_facet); std::locale const utf8(utf8_1, new ascii_ctype_facet); // std::cerr << "utf8 has std::codecvt<lyx::char_type, char, std::mbstate_t> facet: " << std::has_facet<std::codecvt<lyx::char_type, char, std::mbstate_t> >(utf8) << std::endl; // std::cerr << "utf8 has utf8_codecvt_facet facet: " << std::has_facet<utf8_codecvt_facet>(utf8) << std::endl; // std::cerr << "utf8 has std::ctype<lyx::char_type> facet: " << std::has_facet<std::ctype<lyx::char_type> >(utf8) << std::endl; std::basic_ofstream<lyx::char_type> os; os.imbue(utf8); os.open("stream.out"); os << " abc"; os.put(lyx::char_type('d')); os << '\n'; lyx::char_type ae = 0xe4; lyx::char_type oe = 0xf6; lyx::char_type ue = 0xfc; lyx::char_type Ae = 0xc4; lyx::char_type Oe = 0xd6; lyx::char_type Ue = 0xdc; std::basic_string<lyx::char_type> s; s += Ae; s += Oe; s += Ue; os << ' '; os.put(ae); os.put(oe); os.put(ue); os << '\n'; std::cerr << std::hex << ' ' << ae << ' ' << oe << ' ' << ue << '\n'; os << ' ' << s << '\n'; std::cerr << std::hex << ' ' << Ae << ' ' << Oe << ' ' << Ue << '\n'; os << " abcd\n"; return 0; }