I am in the process of converting a bigger document with tex2lyx. In order
to make it work, I had to do the following changes (see attached patch):

- add \text{] and \fbox{} support to math.C
- allow parsing of partial documents (for inclusion with \input}
- steal commandline switch -c from reLyX to support the previous point
- add support for lengths with comma (4,5cm instead of 4.5cm) to minipage 
environments
- add support for some math environments like displaymath
- add support for \bibliography command

The document translates already quite well, but a remaining problem is that
unknown latex commands "\foo" get translated to "\foo " or even "\foo  ".
This destroys also commands like hspace*{}. This happens because
Token::asInput() adds an extra space, and parse_text() does this also for
unknown tokens.

Why is the space added in Token::asInput()? Should something else (what?) be 
used instead where the space hurts?

The patch is work in progress, partly unfinished and contains some ugly
things like Token::lineno_. I'll send a polished version when everything
works.


Georg
Index: src/tex2lyx/math.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/math.C,v
retrieving revision 1.10
diff -u -p -r1.10 math.C
--- src/tex2lyx/math.C	2003/09/09 18:27:24	1.10
+++ src/tex2lyx/math.C	2003/10/04 15:57:59
@@ -27,7 +27,10 @@ bool is_math_env(string const & name)
 {
 	static char const * known_math_envs[] = { "equation", "equation*",
 	"eqnarray", "eqnarray*", "align", "align*", "gather", "gather*",
-	"multline", "multline*", 0};
+	"multline", "multline*", "math", "displaymath", "flalign", "flalign*",
+	// These require extra args
+	// "alignat", "alignat*", "xalignat", "xalignat*", "xxalignat",
+	0};
 
 	for (char const ** what = known_math_envs; *what; ++what)
 		if (*what == name)
@@ -190,14 +193,15 @@ void parse_math(Parser & p, ostream & os
 		}
 
 		else if (t.cs() == "textrm" || t.cs() == "textsf" || t.cs() == "textbf"
-				|| t.cs() == "texttt" || t.cs() == "textsc") {
+				|| t.cs() == "texttt" || t.cs() == "textsc"
+				|| t.cs() == "text" || t.cs() == "intertext") {
 			os << '\\' << t.cs() << '{';
 			parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE);
 			os << '}';
 		}
 
-		else if (t.cs() == "mbox") {
-			os << "\\mbox{";
+		else if (t.cs() == "mbox" || t.cs() == "fbox") {
+			os << "\\" << t.cs() << '{';
 			parse_math(p, os, FLAG_ITEM, MATHTEXT_MODE);
 			os << '}';
 		}
Index: src/tex2lyx/preamble.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/preamble.C,v
retrieving revision 1.15
diff -u -p -r1.15 preamble.C
--- src/tex2lyx/preamble.C	2003/09/08 00:33:41	1.15
+++ src/tex2lyx/preamble.C	2003/10/04 15:58:00
@@ -31,6 +31,8 @@ using std::ostream;
 using std::ostringstream;
 using std::string;
 using std::vector;
+using std::cerr;
+using std::endl;
 
 using lyx::support::LibFileSearch;
 
@@ -173,14 +175,47 @@ void end_preamble(ostream & os, LyXTextC
 
 } // anonymous namespace
 
-LyXTextClass const parse_preamble(Parser & p, ostream & os)
+LyXTextClass const parse_preamble(Parser & p, ostream & os, string const & forceclass)
 {
 	// initialize fixed types
 	special_columns['D'] = 3;
+	bool is_full_document = false;
 
+	// determine wether this is a full document or a fragment for inclusion
 	while (p.good()) {
 		Token const & t = p.get_token();
 
+		//
+		// cat codes
+		//
+		if (t.cat() == catLetter ||
+			  t.cat() == catSpace ||
+			  t.cat() == catSuper ||
+			  t.cat() == catSub ||
+			  t.cat() == catOther ||
+			  t.cat() == catMath ||
+			  t.cat() == catActive ||
+			  t.cat() == catBegin ||
+			  t.cat() == catEnd ||
+			  t.cat() == catAlign ||
+			  t.cat() == catNewline ||
+			  t.cat() == catParameter)
+			;
+
+		else if (t.cat() == catComment)
+			handle_comment(p);
+
+		else if (t.cs() == "documentclass") {
+			is_full_document = true;
+			break;
+		}
+
+	}
+        p.reset();
+
+	while (is_full_document && p.good()) {
+		Token const & t = p.get_token();
+
 #ifdef FILEDEBUG
 		cerr << "t: " << t << " flags: " << flags << "\n";
 		//cell->dump();
@@ -351,8 +386,17 @@ LyXTextClass const parse_preamble(Parser
 			h_preamble << '\\' << t.cs() << ' ';
 	}
 
+	// Force textclass if the user wanted it
+	if(forceclass.size()) {
+		h_textclass = forceclass;
+	}
+	string layoutfilename = LibFileSearch("layouts", h_textclass, "layout");
+	if(!layoutfilename.size()) {
+		cerr << "Error: Could not find layout file for textclass \"" << h_textclass << "\"." << endl;
+		exit(1);
+	}
 	LyXTextClass textclass;
-	textclass.Read(LibFileSearch("layouts", h_textclass, "layout"));
+	textclass.Read(layoutfilename);
 	end_preamble(os, textclass);
 	return textclass;
 }
Index: src/tex2lyx/tex2lyx.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/tex2lyx.C,v
retrieving revision 1.50
diff -u -p -r1.50 tex2lyx.C
--- src/tex2lyx/tex2lyx.C	2003/09/26 14:27:20	1.50
+++ src/tex2lyx/tex2lyx.C	2003/10/04 15:58:00
@@ -21,12 +21,15 @@
 #include "support/path_defines.h"
 #include "support/os.h"
 
+#include <boost/function.hpp>
+
 #include <cctype>
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <sstream>
 #include <vector>
+#include <map>
 
 using std::endl;
 using std::cout;
@@ -34,6 +37,7 @@ using std::cerr;
 using std::getline;
 
 using std::ifstream;
+using std::ofstream;
 using std::istringstream;
 using std::ostringstream;
 using std::stringstream;
@@ -120,22 +124,73 @@ string active_environment()
 }
 
 
-int main(int argc, char * argv[])
+string documentclass;
+
+
+/// return the number of arguments consumed
+typedef boost::function<int(string const &, string const &)> cmd_helper;
+
+
+int parse_help(string const &, string const &)
 {
-	if (argc <= 1) {
-		cerr << "Usage: " << argv[0] << " <infile.tex>" << endl;
-		return 2;
+	cerr << "Usage: tex2lyx [ command line switches ] <infile.tex>\n"
+		"Command line switches (case sensitive):\n"
+		"\t-help              summarize tex2lyx usage\n"
+		"\t-c textclass       declare the textclass" << endl;
+	exit(0);
+}
+
+
+int parse_class(string const & arg, string const &)
+{
+	if (arg.empty()) {
+		cerr << "Missing textclass string after -c switch" << endl;
+		exit(1);
 	}
+	documentclass = arg;
+	return 1;
+}
 
-	lyx::support::os::init(&argc, &argv);
-	lyx::support::setLyxPaths();
 
-	ifstream is(argv[1]);
+void easyParse(int & argc, char * argv[])
+{
+	std::map<string, cmd_helper> cmdmap;
+
+	cmdmap["-c"] = parse_class;
+	cmdmap["-help"] = parse_help;
+	cmdmap["--help"] = parse_help;
+
+	for (int i = 1; i < argc; ++i) {
+		std::map<string, cmd_helper>::const_iterator it
+			= cmdmap.find(argv[i]);
+
+		// don't complain if not found - may be parsed later
+		if (it == cmdmap.end())
+			continue;
+
+		string arg((i + 1 < argc) ? argv[i + 1] : "");
+		string arg2((i + 2 < argc) ? argv[i + 2] : "");
+
+		int const remove = 1 + it->second(arg, arg2);
+
+		// Now, remove used arguments by shifting
+		// the following ones remove places down.
+		argc -= remove;
+		for (int j = i; j < argc; ++j)
+			argv[j] = argv[j + remove];
+		--i;
+	}
+}
+
+
+void tex2lyx(std::istream &is, std::ostream &os)
+{
 	Parser p(is);
 	//p.dump();
 
 	stringstream ss;
-	LyXTextClass textclass = parse_preamble(p, ss);
+	LyXTextClass textclass = parse_preamble(p, ss, documentclass);
+
 	active_environments.push_back("document");
 	Context context(true, textclass);
 	parse_text(p, ss, FLAG_END, true, context);
@@ -143,7 +198,35 @@ int main(int argc, char * argv[])
 	ss << "\n\\end_document\n";
 
 	ss.seekg(0);
-	cout << ss.str();
+	os << ss.str();
+}
+
+
+void tex2lyx(string const &infilename, string const &outfilename)
+{
+	ifstream is(infilename.c_str());
+	ofstream os(outfilename.c_str());
+	cerr << "File: " << infilename << "\n";
+	tex2lyx(is, os);
+}
+
+
+int main(int argc, char * argv[])
+{
+	easyParse(argc, argv);
+
+	if (argc <= 1) {
+		cerr << "Usage: tex2lyx [ command line switches ] <infile.tex>\n"
+			"See tex2lyx -help." << endl;
+		return 2;
+	}
+
+	lyx::support::os::init(&argc, &argv);
+	lyx::support::setLyxPaths();
+
+	ifstream is(argv[1]);
+	tex2lyx(is, cout);
+
 	return 0;
 }
 
Index: src/tex2lyx/tex2lyx.h
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/tex2lyx.h,v
retrieving revision 1.10
diff -u -p -r1.10 tex2lyx.h
--- src/tex2lyx/tex2lyx.h	2003/09/09 18:27:24	1.10
+++ src/tex2lyx/tex2lyx.h	2003/10/04 15:58:00
@@ -23,7 +23,7 @@
 class Context;
 
 /// in preamble.C
-LyXTextClass const parse_preamble(Parser & p, std::ostream & os);
+LyXTextClass const parse_preamble(Parser & p, std::ostream & os, string const & forceclass);
 
 
 /// in text.C
@@ -60,5 +60,13 @@ char const ** is_known(std::string const
 // Access to environment stack
 extern std::vector<std::string> active_environments;
 std::string active_environment();
+
+/*! Reads tex input from \a is and writes lyx output to \a os.
+ *  Uses some common settings for the preamble, so this should only
+ *  be used more than once for included documents.
+ *  Caution: Overwrites the existing preamble settings if the new document
+ *  contains contains a preamble. */
+void tex2lyx(std::istream &is, std::ostream &os);
+void tex2lyx(string const &infilename, string const &outfilename);
 
 #endif
Index: src/tex2lyx/texparser.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/texparser.C,v
retrieving revision 1.21
diff -u -p -r1.21 texparser.C
--- src/tex2lyx/texparser.C	2003/09/08 00:33:41	1.21
+++ src/tex2lyx/texparser.C	2003/10/04 15:58:01
@@ -119,6 +119,12 @@ string Token::asInput() const
 }
 
 
+string Token::asInput2() const
+{
+	return char_ ? string(1, char_) : '\\' + cs_;
+}
+
+
 //
 // Parser
 //
@@ -252,23 +258,23 @@ void Parser::tokenize(istream & is)
 					//do {
 						is.get(c);
 					//} while (catcode(c) == catNewline);
-					push_back(Token("par"));
+					push_back(Token("par", lineno_));
 				} else {
-					push_back(Token('\n', catNewline));
+					push_back(Token('\n', catNewline, lineno_));
 				}
 				is.putback(c);
 				break;
 			}
 
 			case catComment: {
-				push_back(Token(c, catComment));
+				push_back(Token(c, catComment, lineno_));
 				while (is.get(c) && catcode(c) != catNewline)
-					push_back(Token(c, catLetter));
-				push_back(Token(c, catNewline));
+					push_back(Token(c, catLetter, lineno_));
+				push_back(Token(c, catNewline, lineno_));
 				++lineno_;
 				is.get(c);
 				if (catcode(c) == catNewline) {
-					push_back(Token("par"));
+					push_back(Token("par", lineno_));
 					++lineno_;
 				} else {
 					is.putback(c);
@@ -286,16 +292,21 @@ void Parser::tokenize(istream & is)
 						// collect letters
 						while (is.get(c) && catcode(c) == catLetter)
 							s += c;
+						// Is this a starred form?
+						//if(c == '*') {
+						//	s += c;
+						//	is.get(c);
+						//}
 						skipSpaceTokens(is, c);
 					}
-					push_back(Token(s));
+					push_back(Token(s, lineno_));
 				}
 				break;
 			}
 
 			case catSuper:
 			case catSub: {
-				push_back(Token(c, catcode(c)));
+				push_back(Token(c, catcode(c), lineno_));
 				is.get(c);
 				skipSpaceTokens(is, c);
 				break;
@@ -308,7 +319,7 @@ void Parser::tokenize(istream & is)
 			}
 
 			default:
-				push_back(Token(c, catcode(c)));
+				push_back(Token(c, catcode(c), lineno_));
 		}
 	}
 }
@@ -328,7 +339,8 @@ void Parser::dump() const
 
 void Parser::error(string const & msg)
 {
-	cerr << "Line ~" << lineno_ << ":  parse error: " << msg << endl;
+	//cerr << "Line ~" << lineno_ << ":  parse error: " << msg << endl;
+	cerr << "Line ~" << tokens_[pos_].lineno() << ":  parse error: " << msg << endl;
 	dump();
 	//exit(1);
 }
@@ -370,6 +382,12 @@ string Parser::verbatim_item()
 		return res;
 	}
 	return get_token().asInput();
+}
+
+
+void Parser::reset()
+{
+        pos_ = 0;
 }
 
 
Index: src/tex2lyx/texparser.h
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/texparser.h,v
retrieving revision 1.14
diff -u -p -r1.14 texparser.h
--- src/tex2lyx/texparser.h	2003/08/23 00:17:00	1.14
+++ src/tex2lyx/texparser.h	2003/10/04 15:58:01
@@ -70,11 +70,11 @@ enum {
 class Token {
 public:
 	///
-	Token() : cs_(), char_(0), cat_(catIgnore) {}
+	Token(int lineno = 0) : cs_(), char_(0), cat_(catIgnore), lineno_(lineno) {}
 	///
-	Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
+	Token(char c, CatCode cat, int lineno = 0) : cs_(), char_(c), cat_(cat), lineno_(lineno) {}
 	///
-	Token(std::string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
+	Token(std::string const & cs, int lineno = 0) : cs_(cs), char_(0), cat_(catIgnore), lineno_(lineno) {}
 
 	///
 	std::string const & cs() const { return cs_; }
@@ -86,6 +86,8 @@ public:
 	std::string asString() const;
 	///
 	std::string asInput() const;
+	std::string asInput2() const;
+	int lineno() const { return lineno_; }
 
 private:
 	///
@@ -94,6 +96,7 @@ private:
 	char char_;
 	///
 	CatCode cat_;
+	int lineno_;
 };
 
 std::ostream & operator<<(std::ostream & os, Token const & t);
@@ -112,7 +115,7 @@ public:
 	Parser(std::string const & s);
 
 	///
-	int lineno() const { return lineno_; }
+	//int lineno() const { return lineno_; }
 	///
 	void putback();
 	/// dump contents to screen
@@ -148,6 +151,9 @@ public:
 	std::string verbatim_item();
 	///
 	std::string verbatimOption();
+
+	/// resets the parser to initial state
+	void reset();
 
 	///
 	void setCatCode(char c, CatCode cat);
Index: src/tex2lyx/text.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/text.C,v
retrieving revision 1.23
diff -u -p -r1.23 text.C
--- src/tex2lyx/text.C	2003/09/09 18:27:24	1.23
+++ src/tex2lyx/text.C	2003/10/04 15:58:03
@@ -18,6 +18,7 @@
 #include "FloatList.h"
 #include "support/lstrings.h"
 #include "support/tostr.h"
+#include "support/filetools.h"
 
 #include <iostream>
 #include <map>
@@ -106,18 +107,20 @@ map<string, string> split_map(string con
 // understand. Not perfect, but rather best-effort.
 string translate_len(string const & len)
 {
-	const string::size_type i = len.find_first_not_of(" -01234567890.");
+	const string::size_type i = len.find_first_not_of(" -0123456789.,");
+	//'4,5' is a valid LaTeX number. Change it to '4.5'
+	string const length = lyx::support::subst(len, ',', '.');
 	// a normal length
 	if (i == string::npos || len[i]  != '\\')
-		return len;
-	istringstream iss(string(len, 0, i));
+		return length;
+	istringstream iss(string(length, 0, i));
 	double val;
 	iss >> val;
 	val = val*100;
 	ostringstream oss;
 	oss << val;
 	string const valstring = oss.str();
-	const string::size_type i2 = len.find(" ", i);
+	const string::size_type i2 = length.find(" ", i);
 	string const unit = string(len, i, i2 - i);
 	string const endlen = (i2 == string::npos) ? string() : string(len, i2);
 	if (unit == "\\textwidth")
@@ -133,7 +136,7 @@ string translate_len(string const & len)
 	else if (unit == "\\textheight")
 		return valstring + "theight%" + endlen;
 	else
-		return len;
+		return length;
 }
 
 
@@ -239,6 +242,13 @@ void parse_environment(Parser & p, ostre
 		parse_math(p, os, FLAG_END, MATH_MODE);
 		os << "\\end{" << name << "}";
 		end_inset(os);
+	} else if (name == "displaymath") {
+		parent_context.check_layout(os);
+		begin_inset(os, "Formula ");
+		os << "\\[";
+		parse_math(p, os, FLAG_END, MATH_MODE);
+		os << "\\]";
+		end_inset(os);
 	}
 
 	else if (name == "tabular") {
@@ -265,7 +275,7 @@ void parse_environment(Parser & p, ostre
 		begin_inset(os, "Minipage\n");
 		string position = "1";
 		string inner_pos = "0";
-		string height;
+		string height = "0pt";
 		if (p.next_token().asInput() == "[") {
 			switch(p.getArg('[', ']')[0]) {
 			case 't': position = "0"; break;
@@ -330,6 +340,16 @@ void parse_environment(Parser & p, ostre
 		context.check_end_deeper(os);
 	}
 
+	//else if (name == "appendix") {
+		// This is no good latex style, but it works and is used in some documents...
+		//parent_context.check_end_layout(os);
+		//os << "\n\\begin_layout " << parent_context.layout->name() << "\n";
+		//parent_context.need_end_layout = true;
+		//parent_context.need_layout=false;
+		//os << "\\start_of_appendix\n";
+		//parse_text(p, os, FLAG_END, outer, parent_context);
+	//}
+
 	else {
 		parent_context.check_layout(os);
 		handle_ert(os, "\\begin{" + name + "}", parent_context);
@@ -347,6 +367,8 @@ void parse_text(Parser & p, ostream & os
 		Context & context)
 {
 	LyXLayout_ptr newlayout;
+	// Store the latest bibliographystyle (needed for bibtex inset)
+	string bibliographystyle;
 	while (p.good()) {
 		Token const & t = p.get_token();
 
@@ -552,6 +574,7 @@ void parse_text(Parser & p, ostream & os
 		}
 
 		else if (t.cs() == "def") {
+			context.check_layout(os);
 			string name = p.get_token().cs();
 			while (p.next_token().cat() != catBegin)
 				name += p.get_token().asString();
@@ -886,10 +909,14 @@ void parse_text(Parser & p, ostream & os
 				name += p.get_token().asInput();
 			context.check_layout(os);
 			begin_inset(os, "Include ");
-			os << name << '{' << p.getArg('{', '}') << "}\n";
+			string filename(p.getArg('{', '}'));
+			string lyxname(lyx::support::ChangeExtension(filename, ".lyx"));
+			tex2lyx(filename, lyxname);
+			os << name << '{' << lyxname << "}\n";
 			os << "preview false\n";
 			end_inset(os);
 		}
+
 		else if (t.cs() == "fancyhead") {
 			context.check_layout(os);
 			ostringstream ss;
@@ -898,7 +925,28 @@ void parse_text(Parser & p, ostream & os
 			ss << '{' << p.verbatim_item() << "}\n";
 			handle_ert(os, ss.str(), context);
 		}
+                
+		else if (t.cs() == "bibliographystyle") {
+			// store new bibliographystyle
+			bibliographystyle = p.verbatim_item();
+			// output new bibliographystyle.
+			// This is only necessary if used in some other macro than \bibliography.
+			context.check_layout(os);
+			handle_ert(os, "\\bibliographystyle{" + bibliographystyle + "}", context);
+		}
 
+		else if (t.cs() == "bibliography") {
+			context.check_layout(os);
+			begin_inset(os, "LatexCommand ");
+			os << "\\bibtex";
+			// Do we have a bibliographystyle set?
+			if(bibliographystyle.size()) {
+				os << '[' << bibliographystyle << ']';
+			}
+			os << '{' << p.verbatim_item() << "}\n";
+			end_inset(os);
+		}
+
 		else {
 			//cerr << "#: " << t << " mode: " << mode << endl;
 			// heuristic: read up to next non-nested space
@@ -914,7 +962,15 @@ void parse_text(Parser & p, ostream & os
 			handle_ert(os, s + ' ', context);
 			*/
 			context.check_layout(os);
-			handle_ert(os, t.asInput() + ' ', context);
+			// Why two spaces after the token????
+			//handle_ert(os, t.asInput() + ' ', context);
+			if(p.next_token().cat() == catBegin) {
+				handle_ert(os, t.asInput2(), context);
+				//handle_ert(os, t.asInput2() + "{ ", context);
+				//p.get_token();				// Eat '{'
+			} else {
+				handle_ert(os, t.asInput(), context);
+			}
 		}
 
 		if (flags & FLAG_LEAVE) {

Reply via email to