I am in the process of converting a bigger document with tex2lyx. In order
to make it work, I had to do the following changes (see attached patch):
- add \text{] and \fbox{} support to math.C
- allow parsing of partial documents (for inclusion with \input}
- steal commandline switch -c from reLyX to support the previous point
- add support for lengths with comma (4,5cm instead of 4.5cm) to minipage
environments
- add support for some math environments like displaymath
- add support for \bibliography command
The document translates already quite well, but a remaining problem is that
unknown latex commands "\foo" get translated to "\foo " or even "\foo ".
This destroys also commands like hspace*{}. This happens because
Token::asInput() adds an extra space, and parse_text() does this also for
unknown tokens.
Why is the space added in Token::asInput()? Should something else (what?) be
used instead where the space hurts?
The patch is work in progress, partly unfinished and contains some ugly
things like Token::lineno_. I'll send a polished version when everything
works.
Georg
Index: src/tex2lyx/math.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/math.C,v
retrieving revision 1.10
diff -u -p -r1.10 math.C
--- src/tex2lyx/math.C 2003/09/09 18:27:24 1.10
+++ src/tex2lyx/math.C 2003/10/04 15:57:59
@@ -27,7 +27,10 @@ bool is_math_env(string const & name)
{
static char const * known_math_envs[] = { "equation", "equation*",
"eqnarray", "eqnarray*", "align", "align*", "gather", "gather*",
- "multline", "multline*", 0};
+ "multline", "multline*", "math", "displaymath", "flalign", "flalign*",
+ // These require extra args
+ // "alignat", "alignat*", "xalignat", "xalignat*", "xxalignat",
+ 0};
for (char const ** what = known_math_envs; *what; ++what)
if (*what == name)
@@ -190,14 +193,15 @@ void parse_math(Parser & p, ostream & os
}
else if (t.cs() == "textrm" || t.cs() == "textsf" || t.cs() == "textbf"
- || t.cs() == "texttt" || t.cs() == "textsc") {
+ || t.cs() == "texttt" || t.cs() == "textsc"
+ || t.cs() == "text" || t.cs() == "intertext") {
os << '\\' << t.cs() << '{';
parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE);
os << '}';
}
- else if (t.cs() == "mbox") {
- os << "\\mbox{";
+ else if (t.cs() == "mbox" || t.cs() == "fbox") {
+ os << "\\" << t.cs() << '{';
parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE);
os << '}';
}
Index: src/tex2lyx/preamble.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/preamble.C,v
retrieving revision 1.15
diff -u -p -r1.15 preamble.C
--- src/tex2lyx/preamble.C 2003/09/08 00:33:41 1.15
+++ src/tex2lyx/preamble.C 2003/10/04 15:58:00
@@ -31,6 +31,8 @@ using std::ostream;
using std::ostringstream;
using std::string;
using std::vector;
+using std::cerr;
+using std::endl;
using lyx::support::LibFileSearch;
@@ -173,14 +175,47 @@ void end_preamble(ostream & os, LyXTextC
} // anonymous namespace
-LyXTextClass const parse_preamble(Parser & p, ostream & os)
+LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & forceclass)
{
// initialize fixed types
special_columns['D'] = 3;
+ bool is_full_document = false;
+ // determine wether this is a full document or a fragment for inclusion
while (p.good()) {
Token const & t = p.get_token();
+ //
+ // cat codes
+ //
+ if (t.cat() == catLetter ||
+ t.cat() == catSpace ||
+ t.cat() == catSuper ||
+ t.cat() == catSub ||
+ t.cat() == catOther ||
+ t.cat() == catMath ||
+ t.cat() == catActive ||
+ t.cat() == catBegin ||
+ t.cat() == catEnd ||
+ t.cat() == catAlign ||
+ t.cat() == catNewline ||
+ t.cat() == catParameter)
+ ;
+
+ else if (t.cat() == catComment)
+ handle_comment(p);
+
+ else if (t.cs() == "documentclass") {
+ is_full_document = true;
+ break;
+ }
+
+ }
+ p.reset();
+
+ while (is_full_document && p.good()) {
+ Token const & t = p.get_token();
+
#ifdef FILEDEBUG
cerr << "t: " << t << " flags: " << flags << "\n";
//cell->dump();
@@ -351,8 +386,17 @@ LyXTextClass const parse_preamble(Parser
h_preamble << '\\' << t.cs() << ' ';
}
+ // Force textclass if the user wanted it
+ if(forceclass.size()) {
+ h_textclass = forceclass;
+ }
+ string layoutfilename = LibFileSearch("layouts", h_textclass, "layout");
+ if(!layoutfilename.size()) {
+ cerr << "Error: Could not find layout file for textclass \"" << h_textclass << "\"." << endl;
+ exit(1);
+ }
LyXTextClass textclass;
- textclass.Read(LibFileSearch("layouts", h_textclass, "layout"));
+ textclass.Read(layoutfilename);
end_preamble(os, textclass);
return textclass;
}
Index: src/tex2lyx/tex2lyx.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/tex2lyx.C,v
retrieving revision 1.50
diff -u -p -r1.50 tex2lyx.C
--- src/tex2lyx/tex2lyx.C 2003/09/26 14:27:20 1.50
+++ src/tex2lyx/tex2lyx.C 2003/10/04 15:58:00
@@ -21,12 +21,15 @@
#include "support/path_defines.h"
#include "support/os.h"
+#include <boost/function.hpp>
+
#include <cctype>
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
+#include <map>
using std::endl;
using std::cout;
@@ -34,6 +37,7 @@ using std::cerr;
using std::getline;
using std::ifstream;
+using std::ofstream;
using std::istringstream;
using std::ostringstream;
using std::stringstream;
@@ -120,22 +124,73 @@ string active_environment()
}
-int main(int argc, char * argv[])
+string documentclass;
+
+
+/// return the number of arguments consumed
+typedef boost::function<int(string const &, string const &)> cmd_helper;
+
+
+int parse_help(string const &, string const &)
{
- if (argc <= 1) {
- cerr << "Usage: " << argv[0] << " <infile.tex>" << endl;
- return 2;
+ cerr << "Usage: tex2lyx [ command line switches ] <infile.tex>\n"
+ "Command line switches (case sensitive):\n"
+ "\t-help summarize tex2lyx usage\n"
+ "\t-c textclass declare the textclass" << endl;
+ exit(0);
+}
+
+
+int parse_class(string const & arg, string const &)
+{
+ if (arg.empty()) {
+ cerr << "Missing textclass string after -c switch" << endl;
+ exit(1);
}
+ documentclass = arg;
+ return 1;
+}
- lyx::support::os::init(&argc, &argv);
- lyx::support::setLyxPaths();
- ifstream is(argv[1]);
+void easyParse(int & argc, char * argv[])
+{
+ std::map<string, cmd_helper> cmdmap;
+
+ cmdmap["-c"] = parse_class;
+ cmdmap["-help"] = parse_help;
+ cmdmap["--help"] = parse_help;
+
+ for (int i = 1; i < argc; ++i) {
+ std::map<string, cmd_helper>::const_iterator it
+ = cmdmap.find(argv[i]);
+
+ // don't complain if not found - may be parsed later
+ if (it == cmdmap.end())
+ continue;
+
+ string arg((i + 1 < argc) ? argv[i + 1] : "");
+ string arg2((i + 2 < argc) ? argv[i + 2] : "");
+
+ int const remove = 1 + it->second(arg, arg2);
+
+ // Now, remove used arguments by shifting
+ // the following ones remove places down.
+ argc -= remove;
+ for (int j = i; j < argc; ++j)
+ argv[j] = argv[j + remove];
+ --i;
+ }
+}
+
+
+void tex2lyx(std::istream &is, std::ostream &os)
+{
Parser p(is);
//p.dump();
stringstream ss;
- LyXTextClass textclass = parse_preamble(p, ss);
+ LyXTextClass textclass = parse_preamble(p, ss, documentclass);
+
active_environments.push_back("document");
Context context(true, textclass);
parse_text(p, ss, FLAG_END, true, context);
@@ -143,7 +198,35 @@ int main(int argc, char * argv[])
ss << "\n\\end_document\n";
ss.seekg(0);
- cout << ss.str();
+ os << ss.str();
+}
+
+
+void tex2lyx(string const &infilename, string const &outfilename)
+{
+ ifstream is(infilename.c_str());
+ ofstream os(outfilename.c_str());
+ cerr << "File: " << infilename << "\n";
+ tex2lyx(is, os);
+}
+
+
+int main(int argc, char * argv[])
+{
+ easyParse(argc, argv);
+
+ if (argc <= 1) {
+ cerr << "Usage: tex2lyx [ command line switches ] <infile.tex>\n"
+ "See tex2lyx -help." << endl;
+ return 2;
+ }
+
+ lyx::support::os::init(&argc, &argv);
+ lyx::support::setLyxPaths();
+
+ ifstream is(argv[1]);
+ tex2lyx(is, cout);
+
return 0;
}
Index: src/tex2lyx/tex2lyx.h
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/tex2lyx.h,v
retrieving revision 1.10
diff -u -p -r1.10 tex2lyx.h
--- src/tex2lyx/tex2lyx.h 2003/09/09 18:27:24 1.10
+++ src/tex2lyx/tex2lyx.h 2003/10/04 15:58:00
@@ -23,7 +23,7 @@
class Context;
/// in preamble.C
-LyXTextClass const parse_preamble(Parser & p, std::ostream & os);
+LyXTextClass const parse_preamble(Parser & p, std::ostream & os, string const & forceclass);
/// in text.C
@@ -60,5 +60,13 @@ char const ** is_known(std::string const
// Access to environment stack
extern std::vector<std::string> active_environments;
std::string active_environment();
+
+/*! Reads tex input from \a is and writes lyx output to \a os.
+ * Uses some common settings for the preamble, so this should only
+ * be used more than once for included documents.
+ * Caution: Overwrites the existing preamble settings if the new document
+ * contains contains a preamble. */
+void tex2lyx(std::istream &is, std::ostream &os);
+void tex2lyx(string const &infilename, string const &outfilename);
#endif
Index: src/tex2lyx/texparser.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/texparser.C,v
retrieving revision 1.21
diff -u -p -r1.21 texparser.C
--- src/tex2lyx/texparser.C 2003/09/08 00:33:41 1.21
+++ src/tex2lyx/texparser.C 2003/10/04 15:58:01
@@ -119,6 +119,12 @@ string Token::asInput() const
}
+string Token::asInput2() const
+{
+ return char_ ? string(1, char_) : '\\' + cs_;
+}
+
+
//
// Parser
//
@@ -252,23 +258,23 @@ void Parser::tokenize(istream & is)
//do {
is.get(c);
//} while (catcode(c) == catNewline);
- push_back(Token("par"));
+ push_back(Token("par", lineno_));
} else {
- push_back(Token('\n', catNewline));
+ push_back(Token('\n', catNewline, lineno_));
}
is.putback(c);
break;
}
case catComment: {
- push_back(Token(c, catComment));
+ push_back(Token(c, catComment, lineno_));
while (is.get(c) && catcode(c) != catNewline)
- push_back(Token(c, catLetter));
- push_back(Token(c, catNewline));
+ push_back(Token(c, catLetter, lineno_));
+ push_back(Token(c, catNewline, lineno_));
++lineno_;
is.get(c);
if (catcode(c) == catNewline) {
- push_back(Token("par"));
+ push_back(Token("par", lineno_));
++lineno_;
} else {
is.putback(c);
@@ -286,16 +292,21 @@ void Parser::tokenize(istream & is)
// collect letters
while (is.get(c) && catcode(c) == catLetter)
s += c;
+ // Is this a starred form?
+ //if(c == '*') {
+ // s += c;
+ // is.get(c);
+ //}
skipSpaceTokens(is, c);
}
- push_back(Token(s));
+ push_back(Token(s, lineno_));
}
break;
}
case catSuper:
case catSub: {
- push_back(Token(c, catcode(c)));
+ push_back(Token(c, catcode(c), lineno_));
is.get(c);
skipSpaceTokens(is, c);
break;
@@ -308,7 +319,7 @@ void Parser::tokenize(istream & is)
}
default:
- push_back(Token(c, catcode(c)));
+ push_back(Token(c, catcode(c), lineno_));
}
}
}
@@ -328,7 +339,8 @@ void Parser::dump() const
void Parser::error(string const & msg)
{
- cerr << "Line ~" << lineno_ << ": parse error: " << msg << endl;
+ //cerr << "Line ~" << lineno_ << ": parse error: " << msg << endl;
+ cerr << "Line ~" << tokens_[pos_].lineno() << ": parse error: " << msg << endl;
dump();
//exit(1);
}
@@ -370,6 +382,12 @@ string Parser::verbatim_item()
return res;
}
return get_token().asInput();
+}
+
+
+void Parser::reset()
+{
+ pos_ = 0;
}
Index: src/tex2lyx/texparser.h
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/texparser.h,v
retrieving revision 1.14
diff -u -p -r1.14 texparser.h
--- src/tex2lyx/texparser.h 2003/08/23 00:17:00 1.14
+++ src/tex2lyx/texparser.h 2003/10/04 15:58:01
@@ -70,11 +70,11 @@ enum {
class Token {
public:
///
- Token() : cs_(), char_(0), cat_(catIgnore) {}
+ Token(int lineno = 0) : cs_(), char_(0), cat_(catIgnore), lineno_(lineno) {}
///
- Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
+ Token(char c, CatCode cat, int lineno = 0) : cs_(), char_(c), cat_(cat), lineno_(lineno) {}
///
- Token(std::string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
+ Token(std::string const & cs, int lineno = 0) : cs_(cs), char_(0), cat_(catIgnore), lineno_(lineno) {}
///
std::string const & cs() const { return cs_; }
@@ -86,6 +86,8 @@ public:
std::string asString() const;
///
std::string asInput() const;
+ std::string asInput2() const;
+ int lineno() const { return lineno_; }
private:
///
@@ -94,6 +96,7 @@ private:
char char_;
///
CatCode cat_;
+ int lineno_;
};
std::ostream & operator<<(std::ostream & os, Token const & t);
@@ -112,7 +115,7 @@ public:
Parser(std::string const & s);
///
- int lineno() const { return lineno_; }
+ //int lineno() const { return lineno_; }
///
void putback();
/// dump contents to screen
@@ -148,6 +151,9 @@ public:
std::string verbatim_item();
///
std::string verbatimOption();
+
+ /// resets the parser to initial state
+ void reset();
///
void setCatCode(char c, CatCode cat);
Index: src/tex2lyx/text.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/text.C,v
retrieving revision 1.23
diff -u -p -r1.23 text.C
--- src/tex2lyx/text.C 2003/09/09 18:27:24 1.23
+++ src/tex2lyx/text.C 2003/10/04 15:58:03
@@ -18,6 +18,7 @@
#include "FloatList.h"
#include "support/lstrings.h"
#include "support/tostr.h"
+#include "support/filetools.h"
#include <iostream>
#include <map>
@@ -106,18 +107,20 @@ map<string, string> split_map(string con
// understand. Not perfect, but rather best-effort.
string translate_len(string const & len)
{
- const string::size_type i = len.find_first_not_of(" -01234567890.");
+ const string::size_type i = len.find_first_not_of(" -0123456789.,");
+ //'4,5' is a valid LaTeX number. Change it to '4.5'
+ string const length = lyx::support::subst(len, ',', '.');
// a normal length
if (i == string::npos || len[i] != '\\')
- return len;
- istringstream iss(string(len, 0, i));
+ return length;
+ istringstream iss(string(length, 0, i));
double val;
iss >> val;
val = val*100;
ostringstream oss;
oss << val;
string const valstring = oss.str();
- const string::size_type i2 = len.find(" ", i);
+ const string::size_type i2 = length.find(" ", i);
string const unit = string(len, i, i2 - i);
string const endlen = (i2 == string::npos) ? string() : string(len, i2);
if (unit == "\\textwidth")
@@ -133,7 +136,7 @@ string translate_len(string const & len)
else if (unit == "\\textheight")
return valstring + "theight%" + endlen;
else
- return len;
+ return length;
}
@@ -239,6 +242,13 @@ void parse_environment(Parser & p, ostre
parse_math(p, os, FLAG_END, MATH_MODE);
os << "\\end{" << name << "}";
end_inset(os);
+ } else if (name == "displaymath") {
+ parent_context.check_layout(os);
+ begin_inset(os, "Formula ");
+ os << "\\[";
+ parse_math(p, os, FLAG_END, MATH_MODE);
+ os << "\\]";
+ end_inset(os);
}
else if (name == "tabular") {
@@ -265,7 +275,7 @@ void parse_environment(Parser & p, ostre
begin_inset(os, "Minipage\n");
string position = "1";
string inner_pos = "0";
- string height;
+ string height = "0pt";
if (p.next_token().asInput() == "[") {
switch(p.getArg('[', ']')[0]) {
case 't': position = "0"; break;
@@ -330,6 +340,16 @@ void parse_environment(Parser & p, ostre
context.check_end_deeper(os);
}
+ //else if (name == "appendix") {
+ // This is no good latex style, but it works and is used in some documents...
+ //parent_context.check_end_layout(os);
+ //os << "\n\\begin_layout " << parent_context.layout->name() << "\n";
+ //parent_context.need_end_layout = true;
+ //parent_context.need_layout=false;
+ //os << "\\start_of_appendix\n";
+ //parse_text(p, os, FLAG_END, outer, parent_context);
+ //}
+
else {
parent_context.check_layout(os);
handle_ert(os, "\\begin{" + name + "}", parent_context);
@@ -347,6 +367,8 @@ void parse_text(Parser & p, ostream & os
Context & context)
{
LyXLayout_ptr newlayout;
+ // Store the latest bibliographystyle (needed for bibtex inset)
+ string bibliographystyle;
while (p.good()) {
Token const & t = p.get_token();
@@ -552,6 +574,7 @@ void parse_text(Parser & p, ostream & os
}
else if (t.cs() == "def") {
+ context.check_layout(os);
string name = p.get_token().cs();
while (p.next_token().cat() != catBegin)
name += p.get_token().asString();
@@ -886,10 +909,14 @@ void parse_text(Parser & p, ostream & os
name += p.get_token().asInput();
context.check_layout(os);
begin_inset(os, "Include ");
- os << name << '{' << p.getArg('{', '}') << "}\n";
+ string filename(p.getArg('{', '}'));
+ string lyxname(lyx::support::ChangeExtension(filename, ".lyx"));
+ tex2lyx(filename, lyxname);
+ os << name << '{' << lyxname << "}\n";
os << "preview false\n";
end_inset(os);
}
+
else if (t.cs() == "fancyhead") {
context.check_layout(os);
ostringstream ss;
@@ -898,7 +925,28 @@ void parse_text(Parser & p, ostream & os
ss << '{' << p.verbatim_item() << "}\n";
handle_ert(os, ss.str(), context);
}
+
+ else if (t.cs() == "bibliographystyle") {
+ // store new bibliographystyle
+ bibliographystyle = p.verbatim_item();
+ // output new bibliographystyle.
+ // This is only necessary if used in some other macro than \bibliography.
+ context.check_layout(os);
+ handle_ert(os, "\\bibliographystyle{" + bibliographystyle + "}", context);
+ }
+ else if (t.cs() == "bibliography") {
+ context.check_layout(os);
+ begin_inset(os, "LatexCommand ");
+ os << "\\bibtex";
+ // Do we have a bibliographystyle set?
+ if(bibliographystyle.size()) {
+ os << '[' << bibliographystyle << ']';
+ }
+ os << '{' << p.verbatim_item() << "}\n";
+ end_inset(os);
+ }
+
else {
//cerr << "#: " << t << " mode: " << mode << endl;
// heuristic: read up to next non-nested space
@@ -914,7 +962,15 @@ void parse_text(Parser & p, ostream & os
handle_ert(os, s + ' ', context);
*/
context.check_layout(os);
- handle_ert(os, t.asInput() + ' ', context);
+ // Why two spaces after the token????
+ //handle_ert(os, t.asInput() + ' ', context);
+ if(p.next_token().cat() == catBegin) {
+ handle_ert(os, t.asInput2(), context);
+ //handle_ert(os, t.asInput2() + "{ ", context);
+ //p.get_token(); // Eat '{'
+ } else {
+ handle_ert(os, t.asInput(), context);
+ }
}
if (flags & FLAG_LEAVE) {