Am Samstag, 21. Oktober 2006 19:36 schrieb Lars Gullik Bjønnes: > Discussions are made and concluded in the kitchen, those not able to > get out of the living room get no say. > Espeically not if they does not help with providing solutions instead > of problems.
Why should we provide a solution when you tell us several times to ignore latex limitations and it is announced that no solution will be allowed? You should know both Jürgen and me well enough to understand that we are not going to simply demand solutions without helping to find it. For information I attach here the patch I was working on yesterday before the freeze announcement. It already works for the case where you have only one encoding in the document. I also had an idea how to treat multiple encodings that would need a similar amount of changes. Then you would need to add an utf8 encoding to the list (and all the numbers in lib/encoding can probably be removed), some error handling for the case where iconv fails, and then you can use every encoding you could use in 1.4 + utf8. Georg
Index: src/encoding.h =================================================================== --- src/encoding.h (Revision 15452) +++ src/encoding.h (Arbeitskopie) @@ -29,8 +29,9 @@ public: /// Encoding() {} /// - Encoding(std::string const & n, std::string const & l, Uchar const * e) - : Name_(n), LatexName_(l) { + Encoding(std::string const & n, std::string const & l, + std::string const & i, Uchar const * e) + : Name_(n), LatexName_(l), iconvName_(i) { for (int i = 0; i < 256; ++i) encoding_table[i] = e[i]; } @@ -43,6 +44,10 @@ public: return LatexName_; } /// + std::string const & iconvName() const { + return iconvName_; + } + /// Uchar ucs(unsigned char c) const { return encoding_table[c]; } @@ -52,6 +57,8 @@ private: /// std::string LatexName_; /// + std::string iconvName_; + /// Uchar encoding_table[256]; }; Index: src/buffer.C =================================================================== --- src/buffer.C (Revision 15452) +++ src/buffer.C (Arbeitskopie) @@ -819,10 +819,7 @@ void Buffer::makeLaTeXFile(string const { lyxerr[Debug::LATEX] << "makeLaTeXFile..." << endl; - // FIXME UNICODE - // This creates an utf8 encoded file, but the inputenc commands - // specify other encodings - odocfstream ofs; + odocfstream ofs(params().language->encoding()->iconvName()); if (!openFileWrite(ofs, fname)) return; Index: src/bufferparams.C =================================================================== --- src/bufferparams.C (Revision 15452) +++ src/bufferparams.C (Arbeitskopie) @@ -835,32 +835,26 @@ bool BufferParams::writeLaTeX(odocstream texrow.newline(); } - // TODO: Some people want to support more encodings than UTF-8. They can have a field day around here - if (true) { - os << "\\usepackage[utf8]{inputenc}\n"; + if (inputenc == "auto") { + string const doc_encoding = + language->encoding()->latexName(); + + // Create a list with all the input encodings used + // in the document + std::set<string> encodings = + features.getEncodingSet(doc_encoding); + + os << "\\usepackage["; + std::set<string>::const_iterator it = encodings.begin(); + std::set<string>::const_iterator const end = encodings.end(); + for (; it != end; ++it) + os << from_ascii(*it) << ','; + os << from_ascii(doc_encoding) << "]{inputenc}\n"; + texrow.newline(); + } else if (inputenc != "default") { + os << "\\usepackage[" << from_ascii(inputenc) + << "]{inputenc}\n"; texrow.newline(); - } else { - if (inputenc == "auto") { - string const doc_encoding = - language->encoding()->latexName(); - - // Create a list with all the input encodings used - // in the document - std::set<string> encodings = - features.getEncodingSet(doc_encoding); - - os << "\\usepackage["; - std::set<string>::const_iterator it = encodings.begin(); - std::set<string>::const_iterator const end = encodings.end(); - for (; it != end; ++it) - os << from_ascii(*it) << ','; - os << from_ascii(doc_encoding) << "]{inputenc}\n"; - texrow.newline(); - } else if (inputenc != "default") { - os << "\\usepackage[" << from_ascii(inputenc) - << "]{inputenc}\n"; - texrow.newline(); - } } if (use_geometry || nonstandard_papersize) { Index: src/support/docstream.C =================================================================== --- src/support/docstream.C (Revision 15452) +++ src/support/docstream.C (Arbeitskopie) @@ -22,6 +22,8 @@ using lyx::ucs4_codeset; using lyx::ucs2_codeset; +using std::string; + namespace { @@ -31,51 +33,52 @@ char const * utf8_codeset = "UTF-8"; // lyxerr in the future. -class utf8_codecvt_facet_exception : public std::exception { +class iconv_codecvt_facet_exception : public std::exception { public: - virtual ~utf8_codecvt_facet_exception() throw() {} + virtual ~iconv_codecvt_facet_exception() throw() {} virtual const char* what() const throw() { - return "iconv problem in utf8_codecvt_facet initialization"; + return "iconv problem in iconv_codecvt_facet initialization"; } }; /// codecvt facet for conversion of UCS4 (internal representation) to UTF8 /// (external representation) or vice versa -class utf8_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t> +class iconv_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t> { typedef std::codecvt<lyx::char_type, char, std::mbstate_t> base; public: /// Constructor. You have to specify with \p inout whether you want /// to use this facet only for input, only for output or for both. - explicit utf8_codecvt_facet(std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out, + explicit iconv_codecvt_facet(string const & encoding = "UTF-8", + std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out, size_t refs = 0) - : base(refs) + : base(refs), utf8_(encoding == "UTF-8") { if (inout & std::ios_base::in) { - in_cd_ = iconv_open(ucs4_codeset, utf8_codeset); + in_cd_ = iconv_open(ucs4_codeset, encoding.c_str()); if (in_cd_ == (iconv_t)(-1)) { fprintf(stderr, "Error %d returned from iconv_open(in_cd_): %s\n", errno, strerror(errno)); fflush(stderr); - throw utf8_codecvt_facet_exception(); + throw iconv_codecvt_facet_exception(); } } else in_cd_ = (iconv_t)(-1); if (inout & std::ios_base::out) { - out_cd_ = iconv_open(utf8_codeset, ucs4_codeset); + out_cd_ = iconv_open(encoding.c_str(), ucs4_codeset); if (out_cd_ == (iconv_t)(-1)) { fprintf(stderr, "Error %d returned from iconv_open(out_cd_): %s\n", errno, strerror(errno)); fflush(stderr); - throw utf8_codecvt_facet_exception(); + throw iconv_codecvt_facet_exception(); } } else out_cd_ = (iconv_t)(-1); } protected: - virtual ~utf8_codecvt_facet() + virtual ~iconv_codecvt_facet() { if (in_cd_ != (iconv_t)(-1)) if (iconv_close(in_cd_) == -1) { @@ -155,8 +158,10 @@ protected: } virtual int do_max_length() const throw() { - // UTF8 uses at most 6 bytes to represent one code point - return 6; + // UTF8 uses at most 6 bytes to represent one UCS4 code point. + // All other encodings encode one UCS4 code point in one byte + // (and can therefore only encode a subset of UCS4) + return utf8_ ? 6 : 1; } private: /// Do the actual conversion. The interface is equivalent to that of @@ -186,6 +191,8 @@ private: } iconv_t in_cd_; iconv_t out_cd_; + /// Is the narrow encoding UTF8? + bool utf8_; }; } // namespace anon @@ -197,7 +204,7 @@ namespace lyx { idocfstream::idocfstream() : base() { std::locale global; - std::locale locale(global, new utf8_codecvt_facet(in)); + std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in)); imbue(locale); } @@ -207,26 +214,27 @@ idocfstream::idocfstream(const char* s, { // We must imbue the stream before openening the file std::locale global; - std::locale locale(global, new utf8_codecvt_facet(in)); + std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in)); imbue(locale); open(s, mode); } -odocfstream::odocfstream() : base() +odocfstream::odocfstream(string const & encoding) : base() { std::locale global; - std::locale locale(global, new utf8_codecvt_facet(out)); + std::locale locale(global, new iconv_codecvt_facet(encoding, out)); imbue(locale); } - -odocfstream::odocfstream(const char* s, std::ios_base::openmode mode) + +odocfstream::odocfstream(const char* s, std::ios_base::openmode mode, + string const & encoding) : base() { // We must imbue the stream before openening the file std::locale global; - std::locale locale(global, new utf8_codecvt_facet(out)); + std::locale locale(global, new iconv_codecvt_facet(encoding, out)); imbue(locale); open(s, mode); } @@ -236,7 +244,7 @@ odocfstream::odocfstream(const char* s, #if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__) // We get undefined references to these virtual methods. This looks like // a bug in gcc. The implementation here does not do anything useful, since -// it is overriden in utf8_codecvt_facet. +// it is overriden in iconv_codecvt_facet. namespace std { template<> codecvt<lyx::char_type, char, mbstate_t>::result codecvt<lyx::char_type, char, mbstate_t>::do_out(mbstate_t &, const lyx::char_type *, const lyx::char_type *, const lyx::char_type *&, Index: src/support/docstream.h =================================================================== --- src/support/docstream.h (Revision 15452) +++ src/support/docstream.h (Arbeitskopie) @@ -45,14 +45,15 @@ public: ~idocfstream() {} }; -/// File stream for writing UTF8-encoded files with automatic conversion from -/// UCS4. +/// File stream for writing files in 8bit encoding \p encoding with automatic +/// conversion from UCS4. class odocfstream : public std::basic_ofstream<char_type> { typedef std::basic_ofstream<char_type> base; public: - odocfstream(); + odocfstream(std::string const & encoding = "UTF-8"); explicit odocfstream(const char* s, - std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc); + std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc, + std::string const & encoding = "UTF-8"); ~odocfstream() {} }; Index: src/encoding.C =================================================================== --- src/encoding.C (Revision 15452) +++ src/encoding.C (Arbeitskopie) @@ -307,12 +307,12 @@ Encoding const * Encodings::getEncoding( Encodings::Encodings() { - encodinglist["iso8859-1"] = Encoding("iso8859-1", "latin1", tab_iso8859_1); + encodinglist["iso8859-1"] = Encoding("iso8859-1", "latin1", "ISO-8859-1", tab_iso8859_1); symbol_encoding_ = #ifdef USE_UNICODE_FOR_SYMBOLS - Encoding("symbol", "", tab_symbol); + Encoding("symbol", "", "", tab_symbol); #else - Encoding("symbol", "", tab_iso8859_1); + Encoding("symbol", "", "", tab_iso8859_1); #endif } @@ -339,6 +339,8 @@ void Encodings::read(string const & file string const name = lex.getString(); lex.next(); string const latexname = lex.getString(); + lex.next(); + string const iconvname = lex.getString(); lyxerr[Debug::INFO] << "Reading encoding " << name << endl; Uchar table[256]; for (unsigned int i = 0; i < 256; ++i) { @@ -346,7 +348,7 @@ void Encodings::read(string const & file string const tmp = lex.getString(); table[i] = ::strtol(tmp.c_str(), 0 , 16); } - encodinglist[name] = Encoding(name, latexname, table); + encodinglist[name] = Encoding(name, latexname, iconvname, table); if (lex.lex() != et_end) lex.printError("Encodings::read: " "missing end"); Index: lib/encodings =================================================================== --- lib/encodings (Revision 15452) +++ lib/encodings (Arbeitskopie) @@ -1,4 +1,4 @@ -Encoding iso8859-2 latin2 +Encoding iso8859-2 latin2 ISO-8859-2 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -24,7 +24,7 @@ Encoding iso8859-2 latin2 End -Encoding iso8859-3 latin3 +Encoding iso8859-3 latin3 ISO-8859-3 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -50,7 +50,7 @@ Encoding iso8859-3 latin3 End -Encoding iso8859-4 latin4 +Encoding iso8859-4 latin4 ISO-8859-4 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -76,7 +76,7 @@ Encoding iso8859-4 latin4 End -Encoding iso8859-5 iso88595 +Encoding iso8859-5 iso88595 ISO-8859-5 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -102,7 +102,7 @@ Encoding iso8859-5 iso88595 End -Encoding iso8859-6 unknown +Encoding iso8859-6 unknown ISO-8859-6 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -134,7 +134,7 @@ Encoding iso8859-6 unknown End -Encoding iso8859-7 iso-8859-7 +Encoding iso8859-7 iso-8859-7 ISO-8859-7 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -160,7 +160,7 @@ Encoding iso8859-7 iso-8859-7 End -Encoding iso8859-9 latin5 +Encoding iso8859-9 latin5 ISO-8859-9 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -186,7 +186,7 @@ Encoding iso8859-9 latin5 End -Encoding iso8859-13 l7xenc +Encoding iso8859-13 l7xenc ISO-8859-13 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -212,7 +212,7 @@ Encoding iso8859-13 l7xenc End -Encoding iso8859-15 latin9 +Encoding iso8859-15 latin9 ISO-8859-15 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -238,7 +238,7 @@ Encoding iso8859-15 latin9 End -Encoding cp1255 cp1255 +Encoding cp1255 cp1255 CP1255 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -264,7 +264,7 @@ Encoding cp1255 cp1255 End -Encoding cp1251 cp1251 +Encoding cp1251 cp1251 CP1251 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -292,7 +292,7 @@ Encoding cp1251 cp1251 End -Encoding koi8 koi8-r +Encoding koi8 koi8-r KOI8-R 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -320,7 +320,7 @@ Encoding koi8 koi8-r End -Encoding koi8-u koi8-u +Encoding koi8-u koi8-u KOI8-U 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -348,7 +348,7 @@ Encoding koi8-u koi8-u End -Encoding tis620-0 unknown +Encoding tis620-0 unknown TIS-620-0 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f @@ -373,7 +373,7 @@ Encoding tis620-0 unknown 0e58 0e59 0e5a 0e5b 00fc 00fd 00fe 00ff End -Encoding pt154 pt154 +Encoding pt154 pt154 PT154 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f