Am Dienstag, 5. September 2006 14:35 schrieb Abdelrazak Younes:
> Peter Kümmel wrote:
> > In my ordinary text editor (ultraedit) it doen't loock like a text 
file.
> 
> Same for me in wordpad... attached the file.

Can you please try this updated test program? The screen output should look 
like in stream.log. I attached also the generated file. In the old version 
I forgot a const for two virtual methods, therefore the versions of thwe 
base class were called.
I believe that this version should work on any OS.


Georg
 e4 f6 fc
 c4 d6 dc
from: 0x8053470 inbytesleft: 88 outbytesleft: 132
20 61 62 63 64 a 20 e4 f6 fc a 20 c4 d6 dc a 20 61 62 63 64 a 
20 0 0 0 61 0 0 0 62 0 0 0 63 0 0 0 64 0 0 0 a 0 0 0 20 0 0 0 e4 0 0 0 f6 0 0 0 fc 0 0 0 a 0 0 0 20 0 0 0 c4 0 0 0 d6 0 0 0 dc 0 0 0 a 0 0 0 20 0 0 0 61 0 0 0 62 0 0 0 63 0 0 0 64 0 0 0 a 0 0 0 
 inbytesleft: 0 outbytesleft: 104
20 61 62 63 64 a 20 ffffffc3 ffffffa4 ffffffc3 ffffffb6 ffffffc3 ffffffbc a 20 ffffffc3 ffffff84 ffffffc3 ffffff96 ffffffc3 ffffff9c a 20 61 62 63 64 a 
 abcd
 äöü
 ÄÖÜ
 abcd
#include <cerrno>
#include <cstdio>
#include <iconv.h>
#include <iostream>
#include <fstream>
#include <locale>

#define ICONV_CONST

namespace boost {
	typedef unsigned int uint32_t;
}


namespace std {
#ifdef __GNUC__
// We get undefined references to these virtual methods. This looks like
// a bug in gcc. The implementation here does not do anything useful, since
// it is overriden in utf8_codecvt_facet and ascii_ctype_facet.
template<> codecvt<boost::uint32_t, char, mbstate_t>::result
codecvt<boost::uint32_t, char, mbstate_t>::do_out(mbstate_t &, const boost::uint32_t *, const boost::uint32_t *, const boost::uint32_t *&,
		char *, char *, char *&) const { return error; }
template<> codecvt<boost::uint32_t, char, mbstate_t>::result
codecvt<boost::uint32_t, char, mbstate_t>::do_unshift(mbstate_t &, char *, char *, char *&) const { return error; }
template<> codecvt<boost::uint32_t, char, mbstate_t>::result
codecvt<boost::uint32_t, char, mbstate_t>::do_in(mbstate_t &, const char *, const char *, const char *&,
		boost::uint32_t*, boost::uint32_t*, boost::uint32_t*&) const { return error; }
template<> int codecvt<boost::uint32_t, char, mbstate_t>::do_encoding() const throw() { return 0; }
template<> bool codecvt<boost::uint32_t, char, mbstate_t>::do_always_noconv() const throw() { return true; }
template<> int codecvt<boost::uint32_t, char, mbstate_t>::do_length(mbstate_t &, const char *, const char *, size_t) const { return 1; }
template<> int codecvt<boost::uint32_t, char, mbstate_t>::do_max_length() const throw() { return 4; }

template<> ctype<boost::uint32_t>::~ctype() {}
template<> bool
ctype<boost::uint32_t>::do_is(ctype<boost::uint32_t>::mask, boost::uint32_t) const { return false; }
template<> boost::uint32_t const *
ctype<boost::uint32_t>::do_is(const boost::uint32_t *, const boost::uint32_t *, ctype<boost::uint32_t>::mask *) const { return 0; }
template<> const boost::uint32_t *
ctype<boost::uint32_t>::do_scan_is(ctype<boost::uint32_t>::mask, const boost::uint32_t *, const boost::uint32_t *) const { return 0; }
template<> const boost::uint32_t *
ctype<boost::uint32_t>::do_scan_not(ctype<boost::uint32_t>::mask, const boost::uint32_t *, const boost::uint32_t *) const { return 0; }
template<> boost::uint32_t ctype<boost::uint32_t>::do_toupper(boost::uint32_t) const { return 0; }
template<> const boost::uint32_t * ctype<boost::uint32_t>::do_toupper(boost::uint32_t *, boost::uint32_t const *) const { return 0; }
template<> boost::uint32_t ctype<boost::uint32_t>::do_tolower(boost::uint32_t) const { return 0; }
template<> const boost::uint32_t * ctype<boost::uint32_t>::do_tolower(boost::uint32_t *, boost::uint32_t const *) const { return 0; }
template<> boost::uint32_t ctype<boost::uint32_t>::do_widen(char) const { return 0; }
template<> const char *
ctype<boost::uint32_t>::do_widen(const char *, const char *, boost::uint32_t *) const { return 0; }
template<> char
ctype<boost::uint32_t>::do_narrow(const boost::uint32_t, char) const { return 0; }
template<> const boost::uint32_t *
ctype<boost::uint32_t>::do_narrow(const boost::uint32_t *, const boost::uint32_t *, char, char *) const { return 0; }
#endif
}


namespace lyx {
	typedef boost::uint32_t char_type;
}


// codecvt_facet for conversion of lyx::char_type (internal representation) to UTF8 (external representation)
class utf8_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t> {
	typedef std::codecvt<lyx::char_type, char, std::mbstate_t> base;
public:
	explicit utf8_codecvt_facet(size_t refs = 0);
protected:
	virtual ~utf8_codecvt_facet();
	virtual result do_out(state_type &, intern_type const * from,
			intern_type const * from_end, intern_type const *& from_next,
			extern_type * to, extern_type * to_end,
			extern_type *& to_next) const;
	virtual result do_unshift(state_type &, extern_type * to, extern_type *, extern_type *& to_next) const;
	virtual result do_in(state_type &,
			extern_type const * from, extern_type const * from_end,
			extern_type const *& from_next,
			intern_type * to, intern_type * to_end,
			intern_type *& to_next) const;
	virtual int do_encoding() const throw();
	virtual bool do_always_noconv() const throw();
	virtual int do_length(state_type & state, extern_type const * from, extern_type const * end, size_t max) const;
	virtual int do_max_length() const throw();
private:
	inline base::result do_iconv(iconv_t cd, char const ** from, size_t * inbytesleft, char ** to, size_t * outbytesleft) const
	{
		fprintf(stderr, "from: %p inbytesleft: %d outbytesleft: %d\n", *from, *inbytesleft, *outbytesleft);
		for (size_t i = 0; i < *inbytesleft / sizeof(intern_type); ++i) {
			intern_type const * buf = reinterpret_cast<intern_type const *>(*from);
			unsigned int c = buf[i];
			fprintf(stderr, "%x ", c);
		}
		fprintf(stderr, "\n");
		for (size_t i = 0; i < *inbytesleft; ++i) {
			unsigned char const * buf = reinterpret_cast<unsigned char const *>(*from);
			unsigned int c = buf[i];
			fprintf(stderr, "%x ", c);
		}
		fprintf(stderr, "\n");
		char const * to_start = *to;
		size_t converted = iconv(cd, const_cast<char ICONV_CONST **>(from), inbytesleft, to, outbytesleft);
		if (converted == (size_t)(-1)) {
			fprintf(stderr, "Error %d returned from iconv: %s\n", errno, strerror(errno));
			switch(errno) {
				case EINVAL:
				case E2BIG:
					fprintf(stderr, "partial result. inbytesleft: %d outbytesleft: %d\n", *inbytesleft, *outbytesleft);
					fflush(stderr);
					return base::partial;
				case EILSEQ:
				default:
					fprintf(stderr, "error result. inbytesleft: %d outbytesleft: %d\n", *inbytesleft, *outbytesleft);
					fflush(stderr);
					return base::error;
			}
		}
		fprintf(stderr, " inbytesleft: %d outbytesleft: %d\n", *inbytesleft, *outbytesleft); fflush(stderr);
		for (size_t i = 0; i < size_t(*to - to_start); ++i) {
			unsigned int c = to_start[i];
			fprintf(stderr, "%x ", c);
		}
		fprintf(stderr, "\n");
		if (*to == to_start)
			return base::noconv;
		return base::ok;
	}
	mutable iconv_t in_cd_;
	mutable iconv_t out_cd_;
};

utf8_codecvt_facet::utf8_codecvt_facet(size_t refs)
	: base(refs), in_cd_((iconv_t)(-1)), out_cd_((iconv_t)(-1))
{
}
utf8_codecvt_facet::~utf8_codecvt_facet()
{
	if (in_cd_ != (iconv_t)(-1))
		if (iconv_close(in_cd_) == -1) {
			fprintf(stderr, "Error %d returned from iconv_close(in_cd_): %s\n", errno, strerror(errno));
			fflush(stderr);
		}
	if (out_cd_ != (iconv_t)(-1))
		if (iconv_close(out_cd_) == -1) {
			fprintf(stderr, "Error %d returned from iconv_close(out_cd_): %s\n", errno, strerror(errno));
			fflush(stderr);
		}

}
utf8_codecvt_facet::result utf8_codecvt_facet::do_out(state_type &, intern_type const * from,
		intern_type const * from_end, intern_type const *& from_next,
		extern_type * to, extern_type * to_end,
		extern_type *& to_next) const
{
	if (out_cd_ == (iconv_t)(-1)) {
		out_cd_ = iconv_open("UTF-8", "UCS-4LE");
		if (out_cd_ == (iconv_t)(-1)) {
			fprintf(stderr, "Error %d returned from iconv_open(out_cd_): %s\n", errno, strerror(errno));
			fflush(stderr);
			throw std::exception();
		}
	}
	size_t inbytesleft = (from_end - from) * sizeof(intern_type);
	size_t outbytesleft = (to_end - to) * sizeof(extern_type);
	from_next = from;
	to_next = to;
	return do_iconv(out_cd_, reinterpret_cast<char const **>(&from_next), &inbytesleft, &to_next, &outbytesleft);
}
utf8_codecvt_facet::result utf8_codecvt_facet::do_unshift(state_type &, extern_type * to, extern_type *, extern_type *& to_next) const
{
	// utf8 does not use shifting
	to_next = to;
	return base::noconv;
}
utf8_codecvt_facet::result utf8_codecvt_facet::do_in(state_type &,
		extern_type const * from, extern_type const * from_end,
		extern_type const *& from_next,
		intern_type * to, intern_type * to_end,
		intern_type *& to_next) const
{
	if (in_cd_ == (iconv_t)(-1)) {
		in_cd_ = iconv_open("UCS-4", "UTF-8");
		if (in_cd_ == (iconv_t)(-1)) {
			fprintf(stderr, "Error %d returned from iconv_open(in_cd_): %s\n", errno, strerror(errno));
			fflush(stderr);
			throw std::exception();
		}
	}
	size_t inbytesleft = (from_end - from) * sizeof(extern_type);
	size_t outbytesleft = (to_end - to) * sizeof(intern_type);
	from_next = from;
	to_next = to;
	return do_iconv(in_cd_, &from_next, &inbytesleft, reinterpret_cast<char **>(&to_next), &outbytesleft);
}
int utf8_codecvt_facet::do_encoding() const throw()
{
	return 0;
}
bool utf8_codecvt_facet::do_always_noconv() const throw()
{
	return false;
}
int utf8_codecvt_facet::do_length(state_type & /*state*/, extern_type const * from, extern_type const * end, size_t max) const
{
#if 0
	intern_type * to = new intern_type[max];
	intern_type * to_end = to + max;
	intern_type * to_next = to;
	extern_type const * from_next = from;
	do_in(state, from, end, from_next, to, to_end, to_next);
	delete[] to;
	return to_next - to;
#endif
	size_t const length = end - from;
	return std::min(length, max);
}
int utf8_codecvt_facet::do_max_length() const throw()
{
	// UTF8 uses at most 6 bytes to represent one code point
	return 6;
}


// ctype facet for UCS4 streams. Widening and narrowing is restricted to
// ASCII, since we do not need anything else.
class ascii_ctype_facet : public std::ctype<lyx::char_type>
{
public:
	typedef lyx::char_type    char_type;
public:
	explicit ascii_ctype_facet(size_t refs = 0) : std::ctype<char_type>(refs) {}
protected:
	virtual ~ascii_ctype_facet() {}
	virtual char_type do_widen(char c) const
	{
		if (static_cast<unsigned char>(c) < 128)
			return c;
		throw std::bad_cast();
	}
	virtual const char* do_widen(const char* lo, const char* hi, char_type* dest) const
	{
		while (lo < hi) {
			if (static_cast<unsigned char>(*lo) >= 128)
				throw std::bad_cast();
			*dest = *lo;
			++lo;
			++dest;
		}
		return hi;
	}
	virtual char do_narrow(char_type wc, char dfault) const
	{
		if (wc < 128)
			return wc;
		if (wctob(wc) == EOF)
			return dfault;
		throw std::bad_cast();
	}
	virtual const char_type * do_narrow(const char_type * lo, const char_type * hi, char dfault, char * dest) const
	{
		while (lo < hi) {
			if (*lo < 128)
				*dest = *lo;
			else {
				if (wctob(*lo) == EOF)
					*dest = dfault;
				else
					throw std::bad_cast();
			}
			++lo;
			++dest;
		}
		return hi;
	}
};


std::basic_ostream<lyx::char_type> & operator<<(std::basic_ostream<lyx::char_type> & os, char c)
{
	return os.put(lyx::char_type(c));
}


int main()
{
	std::locale const utf8_1(std::locale("C"), new utf8_codecvt_facet);
	std::locale const utf8(utf8_1, new ascii_ctype_facet);
//	std::cerr << "utf8 has std::codecvt<lyx::char_type, char, std::mbstate_t> facet: " << std::has_facet<std::codecvt<lyx::char_type, char, std::mbstate_t> >(utf8) << std::endl;
//	std::cerr << "utf8 has utf8_codecvt_facet facet: " << std::has_facet<utf8_codecvt_facet>(utf8) << std::endl;
//	std::cerr << "utf8 has std::ctype<lyx::char_type> facet: " << std::has_facet<std::ctype<lyx::char_type> >(utf8) << std::endl;
	std::basic_ofstream<lyx::char_type> os;
	os.imbue(utf8);
	os.open("stream.out");
	os << " abc";
	os.put(lyx::char_type('d'));
	os << '\n';
	lyx::char_type ae = 0xe4;
	lyx::char_type oe = 0xf6;
	lyx::char_type ue = 0xfc;
	lyx::char_type Ae = 0xc4;
	lyx::char_type Oe = 0xd6;
	lyx::char_type Ue = 0xdc;
	std::basic_string<lyx::char_type> s;
	s += Ae;
	s += Oe;
	s += Ue;
	os << ' '; os.put(ae); os.put(oe); os.put(ue); os << '\n';
	std::cerr << std::hex << ' ' << ae << ' ' << oe << ' ' << ue << '\n';
	os << ' ' << s << '\n';
	std::cerr << std::hex << ' ' << Ae << ' ' << Oe << ' ' << Ue << '\n';
	os << " abcd\n";
	return 0;
}

Reply via email to