Re: How to cook an omelet

Georg Baum Sun, 22 Oct 2006 00:38:12 -0700

Am Samstag, 21. Oktober 2006 19:36 schrieb Lars Gullik Bjønnes:
> Discussions are made and concluded in the kitchen, those not able to
> get out of the living room get no say.
> Espeically not if they does not help with providing solutions instead
> of problems.


Why should we provide a solution when you tell us several times to ignore 
latex limitations and it is announced that no solution will be allowed? 
You should know both Jürgen and me well enough to understand that we are 
not going to simply demand solutions without helping to find it.

For information I attach here the patch I was working on yesterday before 
the freeze announcement. It already works for the case where you have only 
one encoding in the document. I also had an idea how to treat multiple 
encodings that would need a similar amount of changes. Then you would need 
to add an utf8 encoding to the list (and all the numbers in lib/encoding 
can probably be removed), some error handling for the case where iconv 
fails, and then you can use every encoding you could use in 1.4 + utf8.


Georg

Index: src/encoding.h
===================================================================
--- src/encoding.h	(Revision 15452)
+++ src/encoding.h	(Arbeitskopie)
@@ -29,8 +29,9 @@ public:
 	///
 	Encoding() {}
 	///
-	Encoding(std::string const & n, std::string const & l, Uchar const * e)
-		: Name_(n), LatexName_(l) {
+	Encoding(std::string const & n, std::string const & l,
+	         std::string const & i, Uchar const * e)
+		: Name_(n), LatexName_(l), iconvName_(i) {
 		for (int i = 0; i < 256; ++i)
 			encoding_table[i] = e[i];
 	}
@@ -43,6 +44,10 @@ public:
 		return LatexName_;
 	}
 	///
+	std::string const & iconvName() const {
+		return iconvName_;
+	}
+	///
 	Uchar ucs(unsigned char c) const {
 		return encoding_table[c];
 	}
@@ -52,6 +57,8 @@ private:
 	///
 	std::string LatexName_;
 	///
+	std::string iconvName_;
+	///
 	Uchar encoding_table[256];
 };
 
Index: src/buffer.C
===================================================================
--- src/buffer.C	(Revision 15452)
+++ src/buffer.C	(Arbeitskopie)
@@ -819,10 +819,7 @@ void Buffer::makeLaTeXFile(string const 
 {
 	lyxerr[Debug::LATEX] << "makeLaTeXFile..." << endl;
 
-	// FIXME UNICODE
-	// This creates an utf8 encoded file, but the inputenc commands
-	// specify other encodings
-	odocfstream ofs;
+	odocfstream ofs(params().language->encoding()->iconvName());
 	if (!openFileWrite(ofs, fname))
 		return;
 
Index: src/bufferparams.C
===================================================================
--- src/bufferparams.C	(Revision 15452)
+++ src/bufferparams.C	(Arbeitskopie)
@@ -835,32 +835,26 @@ bool BufferParams::writeLaTeX(odocstream
 		texrow.newline();
 	}
 
-	// TODO: Some people want to support more encodings than UTF-8. They can have a field day around here
-	if (true) {
-		os << "\\usepackage[utf8]{inputenc}\n";
+	if (inputenc == "auto") {
+		string const doc_encoding =
+			language->encoding()->latexName();
+
+		// Create a list with all the input encodings used
+		// in the document
+		std::set<string> encodings =
+			features.getEncodingSet(doc_encoding);
+
+		os << "\\usepackage[";
+		std::set<string>::const_iterator it = encodings.begin();
+		std::set<string>::const_iterator const end = encodings.end();
+		for (; it != end; ++it)
+			os << from_ascii(*it) << ',';
+		os << from_ascii(doc_encoding) << "]{inputenc}\n";
+		texrow.newline();
+	} else if (inputenc != "default") {
+		os << "\\usepackage[" << from_ascii(inputenc)
+		   << "]{inputenc}\n";
 		texrow.newline();
-	} else {
-		if (inputenc == "auto") {
-			string const doc_encoding =
-				language->encoding()->latexName();
-
-			// Create a list with all the input encodings used
-			// in the document
-			std::set<string> encodings =
-				features.getEncodingSet(doc_encoding);
-
-			os << "\\usepackage[";
-			std::set<string>::const_iterator it = encodings.begin();
-			std::set<string>::const_iterator const end = encodings.end();
-			for (; it != end; ++it)
-				os << from_ascii(*it) << ',';
-			os << from_ascii(doc_encoding) << "]{inputenc}\n";
-			texrow.newline();
-		} else if (inputenc != "default") {
-			os << "\\usepackage[" << from_ascii(inputenc)
-			   << "]{inputenc}\n";
-			texrow.newline();
-		}
 	}
 
 	if (use_geometry || nonstandard_papersize) {
Index: src/support/docstream.C
===================================================================
--- src/support/docstream.C	(Revision 15452)
+++ src/support/docstream.C	(Arbeitskopie)
@@ -22,6 +22,8 @@
 using lyx::ucs4_codeset;
 using lyx::ucs2_codeset;
 
+using std::string;
+
 
 namespace {
 
@@ -31,51 +33,52 @@ char const * utf8_codeset = "UTF-8";
 // lyxerr in the future.
 
 
-class utf8_codecvt_facet_exception : public std::exception {
+class iconv_codecvt_facet_exception : public std::exception {
 public:
-	virtual ~utf8_codecvt_facet_exception() throw() {}
+	virtual ~iconv_codecvt_facet_exception() throw() {}
 	virtual const char* what() const throw()
 	{
-		return "iconv problem in utf8_codecvt_facet initialization";
+		return "iconv problem in iconv_codecvt_facet initialization";
 	}
 };
 
 
 /// codecvt facet for conversion of UCS4 (internal representation) to UTF8
 /// (external representation) or vice versa
-class utf8_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t>
+class iconv_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t>
 {
 	typedef std::codecvt<lyx::char_type, char, std::mbstate_t> base;
 public:
 	/// Constructor. You have to specify with \p inout whether you want
 	/// to use this facet only for input, only for output or for both.
-	explicit utf8_codecvt_facet(std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out,
+	explicit iconv_codecvt_facet(string const & encoding = "UTF-8",
+			std::ios_base::openmode inout = std::ios_base::in | std::ios_base::out,
 			size_t refs = 0)
-		: base(refs)
+		: base(refs), utf8_(encoding == "UTF-8")
 	{
 		if (inout & std::ios_base::in) {
-			in_cd_ = iconv_open(ucs4_codeset, utf8_codeset);
+			in_cd_ = iconv_open(ucs4_codeset, encoding.c_str());
 			if (in_cd_ == (iconv_t)(-1)) {
 				fprintf(stderr, "Error %d returned from iconv_open(in_cd_): %s\n",
 				        errno, strerror(errno));
 				fflush(stderr);
-				throw utf8_codecvt_facet_exception();
+				throw iconv_codecvt_facet_exception();
 			}
 		} else
 			in_cd_ = (iconv_t)(-1);
 		if (inout & std::ios_base::out) {
-			out_cd_ = iconv_open(utf8_codeset, ucs4_codeset);
+			out_cd_ = iconv_open(encoding.c_str(), ucs4_codeset);
 			if (out_cd_ == (iconv_t)(-1)) {
 				fprintf(stderr, "Error %d returned from iconv_open(out_cd_): %s\n",
 				        errno, strerror(errno));
 				fflush(stderr);
-				throw utf8_codecvt_facet_exception();
+				throw iconv_codecvt_facet_exception();
 			}
 		} else
 			out_cd_ = (iconv_t)(-1);
 	}
 protected:
-	virtual ~utf8_codecvt_facet()
+	virtual ~iconv_codecvt_facet()
 	{
 		if (in_cd_ != (iconv_t)(-1))
 			if (iconv_close(in_cd_) == -1) {
@@ -155,8 +158,10 @@ protected:
 	}
 	virtual int do_max_length() const throw()
 	{
-		// UTF8 uses at most 6 bytes to represent one code point
-		return 6;
+		// UTF8 uses at most 6 bytes to represent one UCS4 code point.
+		// All other encodings encode one UCS4 code point in one byte
+		// (and can therefore only encode a subset of UCS4)
+		return utf8_ ? 6 : 1;
 	}
 private:
 	/// Do the actual conversion. The interface is equivalent to that of
@@ -186,6 +191,8 @@ private:
 	}
 	iconv_t in_cd_;
 	iconv_t out_cd_;
+	/// Is the narrow encoding UTF8?
+	bool utf8_;
 };
 
 } // namespace anon
@@ -197,7 +204,7 @@ namespace lyx {
 idocfstream::idocfstream() : base()
 {
 	std::locale global;
-	std::locale locale(global, new utf8_codecvt_facet(in));
+	std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in));
 	imbue(locale);
 }
 
@@ -207,26 +214,27 @@ idocfstream::idocfstream(const char* s, 
 {
 	// We must imbue the stream before openening the file
 	std::locale global;
-	std::locale locale(global, new utf8_codecvt_facet(in));
+	std::locale locale(global, new iconv_codecvt_facet(utf8_codeset, in));
 	imbue(locale);
 	open(s, mode);
 }
 
 
-odocfstream::odocfstream() : base()
+odocfstream::odocfstream(string const & encoding) : base()
 {
 	std::locale global;
-	std::locale locale(global, new utf8_codecvt_facet(out));
+	std::locale locale(global, new iconv_codecvt_facet(encoding, out));
 	imbue(locale);
 }
 
-	
-odocfstream::odocfstream(const char* s, std::ios_base::openmode mode)
+
+odocfstream::odocfstream(const char* s, std::ios_base::openmode mode,
+                         string const & encoding)
 	: base()
 {
 	// We must imbue the stream before openening the file
 	std::locale global;
-	std::locale locale(global, new utf8_codecvt_facet(out));
+	std::locale locale(global, new iconv_codecvt_facet(encoding, out));
 	imbue(locale);
 	open(s, mode);
 }
@@ -236,7 +244,7 @@ odocfstream::odocfstream(const char* s, 
 #if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__)
 // We get undefined references to these virtual methods. This looks like
 // a bug in gcc. The implementation here does not do anything useful, since
-// it is overriden in utf8_codecvt_facet.
+// it is overriden in iconv_codecvt_facet.
 namespace std {
 template<> codecvt<lyx::char_type, char, mbstate_t>::result
 codecvt<lyx::char_type, char, mbstate_t>::do_out(mbstate_t &, const lyx::char_type *, const lyx::char_type *, const lyx::char_type *&,
Index: src/support/docstream.h
===================================================================
--- src/support/docstream.h	(Revision 15452)
+++ src/support/docstream.h	(Arbeitskopie)
@@ -45,14 +45,15 @@ public:
 	~idocfstream() {}
 };
 
-/// File stream for writing UTF8-encoded files with automatic conversion from
-/// UCS4.
+/// File stream for writing files in 8bit encoding \p encoding with automatic
+/// conversion from UCS4.
 class odocfstream : public std::basic_ofstream<char_type> {
 	typedef std::basic_ofstream<char_type> base;
 public:
-	odocfstream();
+	odocfstream(std::string const & encoding = "UTF-8");
 	explicit odocfstream(const char* s,
-		std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc);
+		std::ios_base::openmode mode = std::ios_base::out|std::ios_base::trunc,
+		std::string const & encoding = "UTF-8");
 	~odocfstream() {}
 };
 
Index: src/encoding.C
===================================================================
--- src/encoding.C	(Revision 15452)
+++ src/encoding.C	(Arbeitskopie)
@@ -307,12 +307,12 @@ Encoding const * Encodings::getEncoding(
 
 Encodings::Encodings()
 {
-	encodinglist["iso8859-1"] = Encoding("iso8859-1", "latin1", tab_iso8859_1);
+	encodinglist["iso8859-1"] = Encoding("iso8859-1", "latin1", "ISO-8859-1", tab_iso8859_1);
 	symbol_encoding_ =
 #ifdef USE_UNICODE_FOR_SYMBOLS
-		Encoding("symbol", "", tab_symbol);
+		Encoding("symbol", "", "", tab_symbol);
 #else
-		Encoding("symbol", "", tab_iso8859_1);
+		Encoding("symbol", "", "", tab_iso8859_1);
 #endif
 }
 
@@ -339,6 +339,8 @@ void Encodings::read(string const & file
 			string const name = lex.getString();
 			lex.next();
 			string const latexname = lex.getString();
+			lex.next();
+			string const iconvname = lex.getString();
 			lyxerr[Debug::INFO] << "Reading encoding " << name << endl;
 			Uchar table[256];
 			for (unsigned int i = 0; i < 256; ++i) {
@@ -346,7 +348,7 @@ void Encodings::read(string const & file
 				string const tmp = lex.getString();
 				table[i] = ::strtol(tmp.c_str(), 0 , 16);
 			}
-			encodinglist[name] = Encoding(name, latexname, table);
+			encodinglist[name] = Encoding(name, latexname, iconvname, table);
 			if (lex.lex() != et_end)
 				lex.printError("Encodings::read: "
 					       "missing end");
Index: lib/encodings
===================================================================
--- lib/encodings	(Revision 15452)
+++ lib/encodings	(Arbeitskopie)
@@ -1,4 +1,4 @@
-Encoding iso8859-2 latin2
+Encoding iso8859-2 latin2 ISO-8859-2
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -24,7 +24,7 @@ Encoding iso8859-2 latin2
 End
 
 
-Encoding iso8859-3 latin3
+Encoding iso8859-3 latin3 ISO-8859-3
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -50,7 +50,7 @@ Encoding iso8859-3 latin3
 End
 
 
-Encoding iso8859-4 latin4
+Encoding iso8859-4 latin4 ISO-8859-4
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -76,7 +76,7 @@ Encoding iso8859-4 latin4
 End
 
 
-Encoding iso8859-5 iso88595
+Encoding iso8859-5 iso88595 ISO-8859-5
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -102,7 +102,7 @@ Encoding iso8859-5 iso88595
 End
 
 
-Encoding iso8859-6 unknown
+Encoding iso8859-6 unknown ISO-8859-6
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -134,7 +134,7 @@ Encoding iso8859-6 unknown
 End
 
 
-Encoding iso8859-7 iso-8859-7
+Encoding iso8859-7 iso-8859-7 ISO-8859-7
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -160,7 +160,7 @@ Encoding iso8859-7 iso-8859-7
 End
 
 
-Encoding iso8859-9 latin5
+Encoding iso8859-9 latin5 ISO-8859-9
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -186,7 +186,7 @@ Encoding iso8859-9 latin5
 End
 
 
-Encoding iso8859-13 l7xenc
+Encoding iso8859-13 l7xenc ISO-8859-13
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -212,7 +212,7 @@ Encoding iso8859-13 l7xenc
 End
 
 
-Encoding iso8859-15 latin9
+Encoding iso8859-15 latin9 ISO-8859-15
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -238,7 +238,7 @@ Encoding iso8859-15 latin9
 End
 
 
-Encoding cp1255 cp1255
+Encoding cp1255 cp1255 CP1255
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -264,7 +264,7 @@ Encoding cp1255 cp1255
 End
 
 
-Encoding cp1251 cp1251
+Encoding cp1251 cp1251 CP1251
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -292,7 +292,7 @@ Encoding cp1251 cp1251
 End
 
 
-Encoding koi8 koi8-r
+Encoding koi8 koi8-r KOI8-R
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -320,7 +320,7 @@ Encoding koi8 koi8-r
 End
 
 
-Encoding koi8-u koi8-u
+Encoding koi8-u koi8-u KOI8-U
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -348,7 +348,7 @@ Encoding koi8-u koi8-u
 End
 
 
-Encoding tis620-0 unknown
+Encoding tis620-0 unknown TIS-620-0
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
@@ -373,7 +373,7 @@ Encoding tis620-0 unknown
    0e58 0e59 0e5a 0e5b 00fc 00fd 00fe 00ff
 End
 
-Encoding pt154 pt154
+Encoding pt154 pt154 PT154
    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
    20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f

Re: How to cook an omelet

Reply via email to