Am Montag, 14. August 2006 16:30 schrieb Georg Baum: > I think we should change as little as possible. I propose the following > (slightly different than the existing lyx2lyx conversion): > > Old: \inputenc (in the LyX file) determines the encoding of the LyX file and > the TeX file > > New: \inputenc determines only the encoding of the TeX file, the LyX file is > always in utf-8. That would mean that we don't change the value of > \inputenc in the lyx2lyx conversion.
This patch implements that. I plan to put it in tomorrow. Please test. LaTeX output is not touched at all, but it should now be possible to open old documents and also convert them back if they do not contain characters that are not representable in the old encoding. I especially like how convert_utf8() in lyx2lyx looks now. José did definitely prepare this change well! Georg
Index: src/buffer.C =================================================================== --- src/buffer.C (Revision 14676) +++ src/buffer.C (Arbeitskopie) @@ -146,7 +146,7 @@ extern BufferList bufferlist; namespace { -int const LYX_FORMAT = 248; +int const LYX_FORMAT = 249; } // namespace anon Index: src/bufferparams.h =================================================================== --- src/bufferparams.h (Revision 14676) +++ src/bufferparams.h (Arbeitskopie) @@ -175,7 +175,15 @@ public: /// BranchList: BranchList & branchlist(); BranchList const & branchlist() const; - /// + /** + * The input encoding for LaTeX. This can be one of + * - auto: find out the input encoding from the used languages + * - default: Don't load the inputenc package and hope that it will + * work (unlikely) + * - any encoding supported by the inputenc package + * The encoding of the LyX file is always utf8 and has nothing to + * do with this setting. + */ std::string inputenc; /// std::string preamble; Index: lib/lyx2lyx/LyX.py =================================================================== --- lib/lyx2lyx/LyX.py (Revision 14676) +++ lib/lyx2lyx/LyX.py (Arbeitskopie) @@ -73,7 +73,7 @@ format_relation = [("0_06", [200], ge ("1_2", [220], generate_minor_versions("1.2" , 4)), ("1_3", [221], generate_minor_versions("1.3" , 7)), ("1_4", range(222,246), generate_minor_versions("1.4" , 3)), - ("1_5", range(246,249), generate_minor_versions("1.5" , 0))] + ("1_5", range(246,250), generate_minor_versions("1.5" , 0))] def formats_list(): @@ -108,7 +108,9 @@ def trim_eol(line): return line[:-1] -def get_encoding(language, inputencoding): +def get_encoding(language, inputencoding, format): + if format > 248: + return "utf8" from lyx2lyx_lang import lang if inputencoding == "auto": return lang[language][3] @@ -224,7 +226,7 @@ class LyX_Base: self.format = self.read_format() self.language = get_value(self.header, "\\language", 0, default = "english") self.inputencoding = get_value(self.header, "\\inputencoding", 0, default = "auto") - self.encoding = get_encoding(self.language, self.inputencoding) + self.encoding = get_encoding(self.language, self.inputencoding, self.format) self.initial_version = self.read_version() # Second pass over header and preamble, now we know the file encoding @@ -246,7 +248,7 @@ class LyX_Base: self.set_version() self.set_format() if self.encoding == "auto": - self.encoding = get_encoding(self.language, self.encoding) + self.encoding = get_encoding(self.language, self.encoding, self.format) if self.preamble: i = find_token(self.header, '\\textclass', 0) + 1 Index: lib/lyx2lyx/lyx_1_5.py =================================================================== --- lib/lyx2lyx/lyx_1_5.py (Revision 14676) +++ lib/lyx2lyx/lyx_1_5.py (Arbeitskopie) @@ -21,6 +21,7 @@ import re from parser_tools import find_token, find_token_exact, find_tokens, find_end_of, get_value +from LyX import get_encoding #################################################################### @@ -217,13 +218,18 @@ def revert_booktabs(document): def convert_utf8(document): + document.encoding = "utf8" + + +def revert_utf8(document): i = find_token(document.header, "\\inputencoding", 0) if i == -1: - document.header.append("\\inputencoding utf-8") - else: - document.header[i] = "\\inputencoding utf-8" - document.inputencoding = "utf-8" - document.encoding = "utf-8" + document.header.append("\\inputencoding auto") + elif get_value(document.header, "\\inputencoding", i) == "utf8": + document.header[i] = "\\inputencoding auto" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + document.encoding = get_encoding(document.language, document.inputencoding, 248) + ## # Conversion hub @@ -232,11 +238,11 @@ def convert_utf8(document): supported_versions = ["1.5.0","1.5"] convert = [[246, []], [247, [convert_font_settings]], - [248, []] - # ,[xxx, [convert_utf8]] uncomment to support convertion to utf-8 - ] + [248, []], + [249, [convert_utf8]]] -revert = [[247, [revert_booktabs]], +revert = [[248, [revert_utf8]], + [247, [revert_booktabs]], [246, [revert_font_settings]], [245, [revert_framed]]] Index: development/FORMAT =================================================================== --- development/FORMAT (Revision 14676) +++ development/FORMAT (Arbeitskopie) @@ -1,6 +1,21 @@ LyX file-format changes ----------------------- +2006-08-14 Georg Baum <[EMAIL PROTECTED]> + + * format incremented to 249: Unicode + + LyX documents are now always encoded in utf8. The value of + \inputencoding does now only determine the encoding of the created + LaTeX file. + Up to format 248 the value of \inputencoding did also determine the + encoding of the LyX file: + + \inputencoding LyX file encoding + auto as determined by the document language + default latin1 + everything else as determined by \inputencoding + 2006-07-03 Georg Baum <[EMAIL PROTECTED]> * format incremented to 248: Basic booktabs support