Re: [patch] tex2lyx whitespace changes

Georg Baum Tue, 04 Nov 2003 12:57:22 -0800

Am Montag, 3. November 2003 14:41 schrieb Andre Poenitz:

> Ok. I don't have the time to check this thoroughly. It looks sensible,
> so if you can confirm that the ound-trip on the UserGuide is no worse
> than before, I'd commit this.


Please take the attached version. It has exactly the same nesting problems 
as without the patch, the rest is better. It includes the whitespace 
changes as discussed.


Georg

Index: src/tex2lyx/ChangeLog
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/ChangeLog,v
retrieving revision 1.40
diff -u -p -r1.40 ChangeLog
--- src/tex2lyx/ChangeLog	2003/10/23 11:46:33	1.40
+++ src/tex2lyx/ChangeLog	2003/11/04 17:24:58
@@ -1,3 +1,27 @@
+2003-11-03  Georg Baum  <[EMAIL PROTECTED]>
+
+	* math.C:
+	* table.C:
+	* text.C:
+	* context.[Ch]: New functions Context::set_item(),
+	Context::new_paragraph(ostream & os) and Context::atParagraphStart()
+	to make Context usage more explicit
+	* texparser.[Ch]: Rework Parser::tokenize (see comment in texparser.h)
+	* table.C:
+	* math.C:
+	* texparser.C: Don't silently drop comments
+	* texparser.C: Token::asInput() does not append a space anymore
+	* texparser.[Ch]: Renamed Parser::prev_token() to Parser::curr_token().
+	New function Parser::prev_token() returns now really the previous token
+	* Context.[Ch]:
+	* text.C: Convert known vspaces at paragraph start to \\added_space_top
+	* preamble.C: Don't put out newlines twice.
+	* text.C: Fix minipage position bug
+	* text.C: Fix \labelwidthstring bug
+	* text.C: Recognize alignment environments
+	* text.C: Fix a few cases of incorrect context usage, resulting
+	in missing or superflous \begin_layout / \end_laout lines.
+
 2003-10-23  Georg Baum  <[EMAIL PROTECTED]>
 
 	* math.C:
Index: src/tex2lyx/context.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/context.C,v
retrieving revision 1.7
diff -u -p -r1.7 context.C
--- src/tex2lyx/context.C	2003/10/06 15:43:21	1.7
+++ src/tex2lyx/context.C	2003/11/04 17:24:59
@@ -23,7 +23,7 @@ namespace {
 
 void begin_layout(ostream & os, LyXLayout_ptr layout)
 {
-	os << "\n\\begin_layout " << layout->name() << "\n\n";
+	os << "\n\\begin_layout " << layout->name() << "\n";
 }
 
 
@@ -94,11 +94,12 @@ void Context::check_layout(ostream & os)
 			begin_layout(os, layout);
 			need_layout=false;
 			need_end_layout = true;
-			if (!extra_stuff.empty()) {
-				os << extra_stuff;
-				extra_stuff.erase();
-			}
 		}
+		if (!extra_stuff.empty()) {
+			os << extra_stuff;
+			extra_stuff.erase();
+		}
+		os << "\n";
 	}
 }
 
@@ -140,6 +141,20 @@ void Context::check_end_deeper(ostream &
 }
 
 
+void Context::set_item()
+{
+	need_layout = true;
+	has_item = true;
+}
+
+
+void Context::new_paragraph(ostream & os)
+{
+	check_end_layout(os);
+	need_layout = true;
+}
+
+
 void Context::dump(ostream & os, string const & desc) const
 {
 	os << "\n" << desc <<" [";
@@ -147,6 +162,12 @@ void Context::dump(ostream & os, string 
 		os << "need_layout ";
 	if (need_end_layout)
 		os << "need_end_layout ";
+	if (need_end_deeper)
+		os << "need_end_deeper ";
+	if (has_item)
+		os << "has_item ";
+	if (deeper_paragraph)
+		os << "deeper_paragraph ";
 	if (!extra_stuff.empty())
 		os << "extrastuff=[" << extra_stuff << "] ";
 	os << "layout=" << layout->name();
Index: src/tex2lyx/context.h
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/context.h,v
retrieving revision 1.7
diff -u -p -r1.7 context.h
--- src/tex2lyx/context.h	2003/09/09 18:27:24	1.7
+++ src/tex2lyx/context.h	2003/11/04 17:24:59
@@ -37,6 +37,15 @@ struct Context {
 	// description \c desc.
 	void dump(std::ostream &, std::string const & desc = "context") const;
 
+	/// Are we just beginning a new paragraph?
+	bool atParagraphStart() const { return need_layout; }
+
+	/// Begin an item in a list environment
+	void set_item();
+
+	/// Start a new paragraph
+	void new_paragraph(std::ostream & os);
+
 	// Do we need to output some \begin_layout command before the
 	// next characters?
 	bool need_layout;
Index: src/tex2lyx/math.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/math.C,v
retrieving revision 1.11
diff -u -p -r1.11 math.C
--- src/tex2lyx/math.C	2003/10/23 11:46:33	1.11
+++ src/tex2lyx/math.C	2003/11/04 17:24:59
@@ -100,7 +100,6 @@ void parse_math(Parser & p, ostream & os
 		}
 
 		else if (t.cat() == catLetter ||
-			       t.cat() == catSpace ||
 			       t.cat() == catSuper ||
 			       t.cat() == catSub ||
 			       t.cat() == catOther ||
@@ -109,15 +108,6 @@ void parse_math(Parser & p, ostream & os
 			       t.cat() == catParameter)
 			os << t.character();
 
-		else if (t.cat() == catNewline) {
-			//if (p.next_token().cat() == catNewline) {
-			//	p.get_token();
-			//	handle_par(os);
-			//} else {
-				os << "\n "; // note the space
-			//}
-		}
-
 		else if (t.cat() == catBegin) {
 			os << '{';
 			parse_math(p, os, FLAG_BRACE_LAST, mode);
@@ -130,8 +120,13 @@ void parse_math(Parser & p, ostream & os
 			os << "unexpected '}' in math\n";
 		}
 
-		else if (t.cat() == catComment)
-			handle_comment(p);
+		else if (t.cat() == catComment) {
+			if (t.cs().size())
+				cerr << "Ignoring comment: " << t.asInput();
+			else
+				// "%\n" combination
+				p.skip_spaces();
+		}
 
 		//
 		// control sequences
Index: src/tex2lyx/preamble.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/preamble.C,v
retrieving revision 1.16
diff -u -p -r1.16 preamble.C
--- src/tex2lyx/preamble.C	2003/10/23 11:46:33	1.16
+++ src/tex2lyx/preamble.C	2003/11/04 17:25:00
@@ -188,7 +188,7 @@ LyXTextClass const parse_preamble(Parser
 	while (p.good()) {
 		Token const & t = p.get_token();
 
-		if (t.cs() == "documentclass") {
+		if (t.cat() == catEscape && t.cs() == "documentclass") {
 			is_full_document = true;
 			break;
 		}
@@ -206,7 +206,6 @@ LyXTextClass const parse_preamble(Parser
 		// cat codes
 		//
 		if (t.cat() == catLetter ||
-			  t.cat() == catSpace ||
 			  t.cat() == catSuper ||
 			  t.cat() == catSub ||
 			  t.cat() == catOther ||
@@ -215,24 +214,26 @@ LyXTextClass const parse_preamble(Parser
 			  t.cat() == catBegin ||
 			  t.cat() == catEnd ||
 			  t.cat() == catAlign ||
-			  t.cat() == catNewline ||
 			  t.cat() == catParameter)
 		h_preamble << t.character();
 
+		else if (t.cat() == catSpace || t.cat() == catNewline)
+			h_preamble << t.asInput();
+
 		else if (t.cat() == catComment)
-			handle_comment(p);
+			h_preamble << t.asInput();
 
 		else if (t.cs() == "pagestyle")
 			h_paperpagestyle = p.verbatim_item();
 
 		else if (t.cs() == "makeatletter") {
 			p.setCatCode('@', catLetter);
-			h_preamble << "\\makeatletter\n";
+			h_preamble << "\\makeatletter";
 		}
 
 		else if (t.cs() == "makeatother") {
 			p.setCatCode('@', catOther);
-			h_preamble << "\\makeatother\n";
+			h_preamble << "\\makeatother";
 		}
 
 		else if (t.cs() == "newcommand" || t.cs() == "renewcommand"
@@ -246,24 +247,24 @@ LyXTextClass const parse_preamble(Parser
 			string const opts = p.getOpt();
 			string const body = p.verbatim_item();
 			// only non-lyxspecific stuff
-			if (name != "\\noun "
-				  && name != "\\tabularnewline "
-			    && name != "\\LyX "
-				  && name != "\\lyxline "
-				  && name != "\\lyxaddress "
-				  && name != "\\lyxrightaddress "
-				  && name != "\\boldsymbol "
-				  && name != "\\lyxarrow ") {
+			if (   name != "\\noun"
+			    && name != "\\tabularnewline"
+			    && name != "\\LyX"
+			    && name != "\\lyxline"
+			    && name != "\\lyxaddress"
+			    && name != "\\lyxrightaddress"
+			    && name != "\\boldsymbol"
+			    && name != "\\lyxarrow") {
 				ostringstream ss;
 				ss << '\\' << t.cs();
 				if (star)
 					ss << '*';
-				ss << '{' << name << '}' << opts << '{' << body << "}\n";
+				ss << '{' << name << '}' << opts << '{' << body << "}";
 				h_preamble << ss.str();
 /*
 				ostream & out = in_preamble ? h_preamble : os;
 				out << "\\" << t.cs() << "{" << name << "}"
-				    << opts << "{" << body << "}\n";
+				    << opts << "{" << body << "}";
 */
 			}
 		}
@@ -301,7 +302,6 @@ LyXTextClass const parse_preamble(Parser
 			ss << p.getOpt();
 			ss << '{' << p.verbatim_item() << '}';
 			ss << '{' << p.verbatim_item() << '}';
-			ss << '\n';
 			if (name != "lyxcode" && name != "lyxlist"
 					&& name != "lyxrightadress" && name != "lyxaddress")
 				h_preamble << ss.str();
@@ -311,7 +311,7 @@ LyXTextClass const parse_preamble(Parser
 			string name = p.get_token().cs();
 			while (p.next_token().cat() != catBegin)
 				name += p.get_token().asString();
-			h_preamble << "\\def\\" << name << '{' << p.verbatim_item() << "}\n";
+			h_preamble << "\\def\\" << name << '{' << p.verbatim_item() << "}";
 		}
 
 		else if (t.cs() == "newcolumntype") {
@@ -328,7 +328,7 @@ LyXTextClass const parse_preamble(Parser
 			h_preamble << "\\newcolumntype{" << name << "}";
 			if (nargs)
 				h_preamble << "[" << nargs << "]";
-			h_preamble << "{" << p.verbatim_item() << "}\n";
+			h_preamble << "{" << p.verbatim_item() << "}";
 		}
 
 		else if (t.cs() == "setcounter") {
@@ -339,23 +339,21 @@ LyXTextClass const parse_preamble(Parser
 			else if (name == "tocdepth")
 				h_tocdepth = content;
 			else
-				h_preamble << "\\setcounter{" << name << "}{" << content << "}\n";
+				h_preamble << "\\setcounter{" << name << "}{" << content << "}";
 		}
 
 		else if (t.cs() == "setlength") {
 			string const name = p.verbatim_item();
 			string const content = p.verbatim_item();
+			// Is this correct?
 			if (name == "parskip")
 				h_paragraph_separation = "skip";
 			else if (name == "parindent")
 				h_paragraph_separation = "skip";
 			else
-				h_preamble << "\\setlength{" << name << "}{" << content << "}\n";
+				h_preamble << "\\setlength{" << name << "}{" << content << "}";
 		}
 
-		else if (t.cs() == "par")
-			h_preamble << '\n';
-
 		else if (t.cs() == "begin") {
 			string const name = p.getArg('{', '}');
 			if (name == "document")
@@ -364,8 +362,9 @@ LyXTextClass const parse_preamble(Parser
 		}
 
 		else if (t.cs().size())
-			h_preamble << '\\' << t.cs() << ' ';
+			h_preamble << '\\' << t.cs();
 	}
+	p.skip_spaces();
 
 	// Force textclass if the user wanted it
 	if (forceclass.size()) {
Index: src/tex2lyx/table.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/table.C,v
retrieving revision 1.22
diff -u -p -r1.22 table.C
--- src/tex2lyx/table.C	2003/10/29 19:19:27	1.22
+++ src/tex2lyx/table.C	2003/11/04 17:25:02
@@ -192,13 +192,14 @@ void parse_table(Parser & p, ostream & o
 			}
 		}
 
+		else if (t.cat() == catSpace || t.cat() == catNewline)
+				os << t.cs();
+
 		else if (t.cat() == catLetter ||
-			       t.cat() == catSpace ||
 			       t.cat() == catSuper ||
 			       t.cat() == catSub ||
 			       t.cat() == catOther ||
 			       t.cat() == catActive ||
-			       t.cat() == catNewline ||
 			       t.cat() == catParameter)
 			os << t.character();
 
@@ -216,6 +217,7 @@ void parse_table(Parser & p, ostream & o
 
 		else if (t.cat() == catAlign) {
 			os << TAB;
+			p.skip_spaces();
 		}
 
 		else if (t.cs() == "tabularnewline" || t.cs() == "\\") {
@@ -232,7 +234,7 @@ void parse_table(Parser & p, ostream & o
 			hlines += "\\cline{" + p.verbatim_item() + '}';
 
 		else if (t.cat() == catComment)
-			handle_comment(p);
+			os << t.asInput();
 
 		else if (t.cs() == "(") {
 			os << "\\(";
Index: src/tex2lyx/tex2lyx.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/tex2lyx.C,v
retrieving revision 1.52
diff -u -p -r1.52 tex2lyx.C
--- src/tex2lyx/tex2lyx.C	2003/10/23 11:46:33	1.52
+++ src/tex2lyx/tex2lyx.C	2003/11/04 17:25:02
@@ -52,19 +52,6 @@ using lyx::support::IsFileWriteable;
 // Hacks to allow the thing to link in the lyxlayout stuff
 LyXErr lyxerr(std::cerr.rdbuf());
 
-void handle_comment(Parser & p)
-{
-	string s;
-	while (p.good()) {
-		Token const & t = p.get_token();
-		if (t.cat() == catNewline)
-			break;
-		s += t.asString();
-	}
-	//cerr << "comment: " << s << "\n";
-	p.skip_spaces();
-}
-
 
 string const trim(string const & a, char const * p)
 {
@@ -238,6 +225,13 @@ void tex2lyx(std::istream &is, std::ostr
 	active_environments.pop_back();
 	ss.seekg(0);
 	os << ss.str();
+#ifdef TEST_PARSER
+	p.reset();
+	ofstream parsertest("parsertest.tex");
+	while (p.good())
+		parsertest << p.get_token().asInput();
+	// <origfile> and parsertest.tex should now have identical content
+#endif
 }
 
 
Index: src/tex2lyx/tex2lyx.h
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/tex2lyx.h,v
retrieving revision 1.11
diff -u -p -r1.11 tex2lyx.h
--- src/tex2lyx/tex2lyx.h	2003/10/23 11:46:33	1.11
+++ src/tex2lyx/tex2lyx.h	2003/11/04 17:25:03
@@ -46,7 +46,6 @@ void handle_tabular(Parser & p, std::ost
 
 
 /// in tex2lyx.C
-void handle_comment(Parser & p);
 std::string const trim(std::string const & a, char const * p = " \t\n\r");
 
 void split(std::string const & s, std::vector<std::string> & result,
Index: src/tex2lyx/texparser.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/texparser.C,v
retrieving revision 1.22
diff -u -p -r1.22 texparser.C
--- src/tex2lyx/texparser.C	2003/10/23 11:46:33	1.22
+++ src/tex2lyx/texparser.C	2003/11/04 17:25:03
@@ -28,17 +28,6 @@ namespace {
 
 CatCode theCatcode[256];
 
-void skipSpaceTokens(istream & is, char c)
-{
-	// skip trailing spaces
-	while (catcode(c) == catSpace || catcode(c) == catNewline)
-		if (!is.get(c))
-			break;
-	//cerr << "putting back: " << c << "\n";
-	is.putback(c);
-}
-
-
 void catInit()
 {
 	fill(theCatcode, theCatcode + 256, catOther);
@@ -95,12 +84,16 @@ CatCode catcode(unsigned char c)
 
 ostream & operator<<(ostream & os, Token const & t)
 {
-	if (t.cs().size())
+	if (t.cat() == catComment)
+		os << '%' << t.cs() << '\n';
+	else if (t.cat() == catSpace)
+		os << t.cs();
+	else if (t.cat() == catEscape)
 		os << '\\' << t.cs() << ' ';
 	else if (t.cat() == catLetter)
 		os << t.character();
 	else if (t.cat() == catNewline)
-		os << "[\\n," << t.cat() << "]\n";
+		os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
 	else
 		os << '[' << t.character() << ',' << t.cat() << ']';
 	return os;
@@ -115,7 +108,11 @@ string Token::asString() const
 
 string Token::asInput() const
 {
-	return char_ ? string(1, char_) : '\\' + cs_ + ' ';
+	if (cat_ == catComment)
+		return '%' + cs_ + '\n';
+	if (cat_ == catSpace || cat_ == catNewline)
+		return cs_;
+	return char_ ? string(1, char_) : '\\' + cs_;
 }
 
 
@@ -154,6 +151,13 @@ void Parser::pop_back()
 Token const & Parser::prev_token() const
 {
 	static const Token dummy;
+	return pos_ > 1 ? tokens_[pos_ - 2] : dummy;
+}
+
+
+Token const & Parser::curr_token() const
+{
+	static const Token dummy;
 	return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
 }
 
@@ -173,20 +177,41 @@ Token const & Parser::get_token()
 }
 
 
-void Parser::skip_spaces()
+void Parser::skip_spaces(bool skip_comments)
 {
-	while (1) {
-		if (next_token().cat() == catSpace || next_token().cat() == catNewline)
+	// We just silently return if we have no more tokens.
+	// skip_spaces() should be callable at any time,
+	// the caller must check p::good() anyway.
+	while (good()) {
+		if ( next_token().cat() == catSpace ||
+		    (next_token().cat() == catNewline && next_token().cs().size() == 1) ||
+		     next_token().cat() == catComment && next_token().cs().empty())
 			get_token();
-		else if (next_token().cat() == catComment)
-			while (next_token().cat() != catNewline)
-				get_token();
+		else if (skip_comments && next_token().cat() == catComment)
+			cerr << "  Ignoring comment: " << get_token().asInput();
 		else
 			break;
 	}
 }
 
 
+void Parser::unskip_spaces(bool skip_comments)
+{
+	while (pos_ > 0) {
+		if ( curr_token().cat() == catSpace ||
+		    (curr_token().cat() == catNewline && curr_token().cs().size() == 1))
+			putback();
+		else if (skip_comments && curr_token().cat() == catComment) {
+			// TODO: Get rid of this
+			cerr << "Unignoring comment: " << curr_token().asInput();
+			putback();
+		}
+		else
+			break;
+	}
+}
+
+
 void Parser::putback()
 {
 	--pos_;
@@ -209,7 +234,12 @@ char Parser::getChar()
 
 string Parser::getArg(char left, char right)
 {
-	skip_spaces();
+	skip_spaces(true);
+
+	// This is needed if a partial file ends with a command without arguments,
+	// e. g. \medskip
+	if (! good())
+		return string();
 
 	string result;
 	char c = getChar();
@@ -217,8 +247,17 @@ string Parser::getArg(char left, char ri
 	if (c != left)
 		putback();
 	else
-		while ((c = getChar()) != right && good())
-			result += c;
+		while ((c = getChar()) != right && good()) {
+			// Ignore comments
+			if (curr_token().cat() == catComment) {
+				if (curr_token().cs().size())
+					cerr << "Ignoring comment: " << curr_token().asInput();
+			}
+			else if (curr_token().cat() == catSpace || curr_token().cat() == catNewline)
+				result += curr_token().cs();
+			else
+				result += c;
+		}
 
 	return result;
 }
@@ -245,34 +284,39 @@ void Parser::tokenize(istream & is)
 		//cerr << "reading c: " << c << "\n";
 
 		switch (catcode(c)) {
+			case catSpace: {
+				string s(1, c);
+				while (is.get(c) && catcode(c) == catSpace)
+					s += c;
+				if (catcode(c) != catSpace)
+					is.putback(c);
+				push_back(Token(s, catSpace));
+				break;
+			}
+
 			case catNewline: {
 				++lineno_;
-				is.get(c);
-				if (catcode(c) == catNewline) {
-					//do {
-						is.get(c);
-					//} while (catcode(c) == catNewline);
-					push_back(Token("par"));
-				} else {
-					push_back(Token('\n', catNewline));
+				string s(1, c);
+				while (is.get(c) && catcode(c) == catNewline) {
+					++lineno_;
+					s += c;
 				}
-				is.putback(c);
+				if (catcode(c) != catNewline)
+					is.putback(c);
+				push_back(Token(s, catNewline));
 				break;
 			}
 
 			case catComment: {
-				push_back(Token(c, catComment));
+				// We don't treat "%\n" combinations here specially because
+				// we want to preserve them in the preamble
+				string s;
 				while (is.get(c) && catcode(c) != catNewline)
-					push_back(Token(c, catLetter));
-				push_back(Token(c, catNewline));
+					s += c;
+				// Note: The '%' at the beginning and the '\n' at the end
+				// of the comment are not stored.
 				++lineno_;
-				is.get(c);
-				if (catcode(c) == catNewline) {
-					push_back(Token("par"));
-					++lineno_;
-				} else {
-					is.putback(c);
-				}
+				push_back(Token(s, catComment));
 				break;
 			}
 
@@ -286,18 +330,11 @@ void Parser::tokenize(istream & is)
 						// collect letters
 						while (is.get(c) && catcode(c) == catLetter)
 							s += c;
-						skipSpaceTokens(is, c);
+						if (catcode(c) != catLetter)
+							is.putback(c);
 					}
-					push_back(Token(s));
+					push_back(Token(s, catEscape));
 				}
-				break;
-			}
-
-			case catSuper:
-			case catSub: {
-				push_back(Token(c, catcode(c)));
-				is.get(c);
-				skipSpaceTokens(is, c);
 				break;
 			}
 
Index: src/tex2lyx/texparser.h
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/texparser.h,v
retrieving revision 1.15
diff -u -p -r1.15 texparser.h
--- src/tex2lyx/texparser.h	2003/10/23 11:46:33	1.15
+++ src/tex2lyx/texparser.h	2003/11/04 17:25:04
@@ -75,7 +75,7 @@ public:
 	///
 	Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
 	///
-	Token(std::string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
+	Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {}
 
 	///
 	std::string const & cs() const { return cs_; }
@@ -100,9 +100,16 @@ private:
 std::ostream & operator<<(std::ostream & os, Token const & t);
 
 
-//
-// Actual parser class
-//
+/*!
+ * Actual parser class
+ *
+ * The parser parses every character of the inputstream into a token
+ * and classifies the token.
+ * The following transformations are done:
+ * - Consecutive spaces are combined into one single token with CatCode catSpace
+ * - Consecutive newlines are combined into one single token with CatCode catNewline
+ * - Comments and %\n combinations are parsed into one token with CatCode catComment
+ */
 
 class Parser {
 
@@ -136,11 +143,15 @@ public:
 	///
 	Token const & prev_token() const;
 	///
-	Token const & next_token() const;
+	Token const & curr_token() const;
 	///
+	Token const & next_token() const;
+	/// Make the next token current and return that.
 	Token const & get_token();
-	/// skips spaces if any
-	void skip_spaces();
+	/// skips spaces (and comments if \param skip_comments is true)
+	void skip_spaces(bool skip_comments = false);
+	/// puts back spaces (and comments if \param skip_comments is true)
+	void unskip_spaces(bool skip_comments = false);
 	///
 	void lex(std::string const & s);
 	///
@@ -156,7 +167,7 @@ public:
 	///
 	CatCode getCatCode(char c) const;
 
-//private:
+private:
 	///
 	int lineno_;
 	///
Index: src/tex2lyx/text.C
===================================================================
RCS file: /cvs/lyx/lyx-devel/src/tex2lyx/text.C,v
retrieving revision 1.26
diff -u -p -r1.26 text.C
--- src/tex2lyx/text.C	2003/11/03 17:47:28	1.26
+++ src/tex2lyx/text.C	2003/11/04 17:25:06
@@ -170,8 +170,30 @@ void skip_braces(Parser & p)
 }
 
 
-void handle_ert(ostream & os, string const & s, Context const & context)
+void handle_ert(ostream & os, string const & s, Context & context, bool check_layout = true)
 {
+	if (check_layout) {
+		// We must have a valid layout before outputting the ERT inset.
+		context.check_layout(os);
+	}
+	Context newcontext(true, context.textclass);
+	begin_inset(os, "ERT");
+	os << "\nstatus Collapsed\n";
+	newcontext.check_layout(os);
+	for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
+		if (*it == '\\')
+			os << "\n\\backslash \n";
+		else
+			os << *it;
+	}
+	newcontext.check_end_layout(os);
+	end_inset(os);
+}
+
+
+void handle_comment(ostream & os, string const & s, Context & context)
+{
+	// TODO: Handle this better
 	Context newcontext(true, context.textclass);
 	begin_inset(os, "ERT");
 	os << "\nstatus Collapsed\n";
@@ -182,6 +204,8 @@ void handle_ert(ostream & os, string con
 		else
 			os << *it;
 	}
+	// make sure that our comment is the last thing on the line
+	os << "\n\\newline";
 	newcontext.check_end_layout(os);
 	end_inset(os);
 }
@@ -217,10 +241,11 @@ void output_command_layout(ostream & os,
 	context.check_deeper(os);
 	context.check_layout(os);
 	if (context.layout->optionalargs > 0) {
+		p.skip_spaces();
 		if (p.next_token().character() == '[') {
 			p.get_token(); // eat '['
 			begin_inset(os, "OptArg\n");
-			os << "collapsed true\n";
+			os << "collapsed true\n\n";
 			parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context);
 			end_inset(os);
 		}
@@ -228,9 +253,47 @@ void output_command_layout(ostream & os,
 	parse_text_snippet(p, os, FLAG_ITEM, outer, context);
 	context.check_end_layout(os);
 	context.check_end_deeper(os);
+	// We don't need really a new paragraph, but
+	// we must make sure that the next item gets a \begin_layout.
+	parent_context.new_paragraph(os);
 }
 
 
+/*!
+ * Output a space if necessary.
+ * This function gets called for every whitespace token.
+ *
+ * We have three cases here:
+ * 1. A space must be suppressed. Example: The lyxcode case below
+ * 2. A space may be suppressed. Example: Spaces before "\par"
+ * 3. A space must not be suppressed. Example: A space between two words
+ *
+ * We currently handle only 1. and 3 and from 2. only the case of
+ * spaces before newlines as a side effect.
+ *
+ * 2. could be used to suppress as many spaces as possible. This has two effects:
+ * - Reimporting LyX generated LaTeX files changes almost no whitespace
+ * - Superflous whitespace from non LyX generated LaTeX files is removed.
+ * The drawback is that the logic inside the function becomes
+ * complicated, and that is the reason why it is not implemented.
+ */
+void check_space(Parser const & p, ostream & os, Context & context)
+{
+	Token const next = p.next_token();
+	Token const curr = p.curr_token();
+	// A space before a single newline and vice versa must be ignored
+	// LyX emits a newline before \end{lyxcode}.
+	// This newline must be ignored,
+	// otherwise LyX will add an additional protected space.
+	if (next.cat() == catSpace ||
+	    next.cat() == catNewline ||
+	    (next.cs() == "end" && context.layout->free_spacing && curr.cat() == catNewline)) {
+		return;
+	}
+	context.check_layout(os);
+	os << ' ';
+}
+
 void parse_environment(Parser & p, ostream & os, bool outer,
 		       Context & parent_context)
 {
@@ -239,6 +302,8 @@ void parse_environment(Parser & p, ostre
 	const bool is_starred = suffixIs(name, '*');
 	string const unstarred_name = rtrim(name, "*");
 	active_environments.push_back(name);
+	p.skip_spaces();
+
 	if (is_math_env(name)) {
 		parent_context.check_layout(os);
 		begin_inset(os, "Formula ");
@@ -262,13 +327,15 @@ void parse_environment(Parser & p, ostre
 			os << "placement " << p.getArg('[', ']') << '\n';
 		}
 		os << "wide " << tostr(is_starred)
-		   << "\ncollapsed false\n";
+		   << "\ncollapsed false\n\n";
 		parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
 		end_inset(os);
+		// We don't need really a new paragraph, but
+		// we must make sure that the next item gets a \begin_layout.
+		parent_context.new_paragraph(os);
 	}
 
 	else if (name == "minipage") {
-		parent_context.check_layout(os);
 		string position = "1";
 		string inner_pos = "0";
 		string height = "0pt";
@@ -293,8 +360,8 @@ void parse_environment(Parser & p, ostre
 				if (p.next_token().asInput() == "[") {
 					latex_inner_pos = p.getArg('[', ']');
 					switch(latex_inner_pos[0]) {
-					case 't': inner_pos = "0"; break;
-					case 'c': inner_pos = "1"; break;
+					case 'c': inner_pos = "0"; break;
+					case 't': inner_pos = "1"; break;
 					case 'b': inner_pos = "2"; break;
 					case 's': inner_pos = "3"; break;
 					default:
@@ -318,11 +385,11 @@ void parse_environment(Parser & p, ostre
 				ss << '[' << latex_inner_pos << ']';
 			ss << "{" << width << "}";
 			handle_ert(os, ss.str(), parent_context);
-			parent_context.check_end_layout(os);
-			parent_context.need_layout = true;
+			parent_context.new_paragraph(os);
 			parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
 			handle_ert(os, "\\end{minipage}", parent_context);
 		} else {
+			parent_context.check_layout(os);
 			begin_inset(os, "Minipage\n");
 			os << "position " << position << '\n';
 			os << "inner_position " << inner_pos << '\n';
@@ -332,11 +399,27 @@ void parse_environment(Parser & p, ostre
 			parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
 			end_inset(os);
 		}
-
 	}
 
-	else if (name == "center") {
+	// Alignment settings
+	else if (name == "center" || name == "flushleft" || name == "flushright" ||
+	         name == "centering" || name == "raggedright" || name == "raggedleft") {
+		// We must begin a new paragraph if not already done
+		if (! parent_context.atParagraphStart()) {
+			parent_context.check_end_layout(os);
+			parent_context.new_paragraph(os);
+		}
+		if (name == "flushleft" || name == "raggedright")
+			parent_context.extra_stuff += "\\align left ";
+		else if (name == "flushright" || name == "raggedleft")
+			parent_context.extra_stuff += "\\align right ";
+		else
+			parent_context.extra_stuff += "\\align center ";
 		parse_text(p, os, FLAG_END, outer, parent_context);
+		// Just in case the environment is empty ..
+		parent_context.extra_stuff.erase();
+		// We must begin a new paragraph to reset the alignment
+		parent_context.new_paragraph(os);
 	}
 
 	// The single '=' is meant here.
@@ -349,9 +432,11 @@ void parse_environment(Parser & p, ostre
 		case  LATEX_LIST_ENVIRONMENT:
 			context.extra_stuff = "\\labelwidthstring "
 				+ p.verbatim_item() + '\n';
+			p.skip_spaces();
 			break;
 		case  LATEX_BIB_ENVIRONMENT:
 			p.verbatim_item(); // swallow next arg
+			p.skip_spaces();
 			break;
 		default:
 			break;
@@ -360,6 +445,7 @@ void parse_environment(Parser & p, ostre
 		parse_text(p, os, FLAG_END, outer, context);
 		context.check_end_layout(os);
 		context.check_end_deeper(os);
+		parent_context.new_paragraph(os);
 	}
 
 	else if (name == "appendix") {
@@ -383,19 +469,20 @@ void parse_environment(Parser & p, ostre
 
 	else if (name == "tabbing") {
 		// We need to remember that we have to handle '\=' specially
-		parent_context.check_layout(os);
 		handle_ert(os, "\\begin{" + name + "}", parent_context);
 		parse_text_snippet(p, os, FLAG_END | FLAG_TABBING, outer, parent_context);
 		handle_ert(os, "\\end{" + name + "}", parent_context);
 	}
 
 	else {
-		parent_context.check_layout(os);
 		handle_ert(os, "\\begin{" + name + "}", parent_context);
 		parse_text_snippet(p, os, FLAG_END, outer, parent_context);
 		handle_ert(os, "\\end{" + name + "}", parent_context);
 	}
+
 	active_environments.pop_back();
+	if (name != "math")
+		p.skip_spaces();
 }
 
 } // anonymous namespace
@@ -485,9 +572,10 @@ void parse_text(Parser & p, ostream & os
 			skip_braces(p);
 		}
 
+		else if (t.cat() == catSpace || (t.cat() == catNewline && t.cs().size() == 1))
+			check_space(p, os, context);
 
 		else if (t.cat() == catLetter ||
-			       t.cat() == catSpace ||
 			       t.cat() == catOther ||
 			       t.cat() == catAlign ||
 			       t.cat() == catParameter) {
@@ -495,16 +583,9 @@ void parse_text(Parser & p, ostream & os
 			os << t.character();
 		}
 
-		else if (t.cat() == catNewline) {
-			if (p.next_token().cat() == catNewline) {
-				// this should have been be done by
-				// the parser already
-				cerr << "what are we doing here?" << endl;
-				p.get_token();
-				context.need_layout = true;
-			} else {
-				os << " "; // note the space
-			}
+		else if (t.cat() == catNewline || (t.cat() == catEscape && t.cs() == "par")) {
+			p.skip_spaces();
+			context.new_paragraph(os);
 		}
 
 		else if (t.cat() == catActive) {
@@ -519,20 +600,19 @@ void parse_text(Parser & p, ostream & os
 		}
 
 		else if (t.cat() == catBegin) {
-// FIXME???
 			// special handling of size changes
 			context.check_layout(os);
 			bool const is_size = is_known(p.next_token().cs(), known_sizes);
-			Context newcontext(false, context.textclass);
-//			need_end_layout = false;
-			string const s = parse_text(p, FLAG_BRACE_LAST, outer, newcontext);
-//			need_end_layout = true;
-			if (s.empty() && p.next_token().character() == '`')
-				; // ignore it in  {}``
+			Token const prev = p.prev_token();
+			string const s = parse_text(p, FLAG_BRACE_LAST, outer, context);
+			if (s.empty() && (p.next_token().character() == '`' ||
+			                  (prev.character() == '-' && p.next_token().character())))
+				; // ignore it in {}`` or -{}-
 			else if (is_size || s == "[" || s == "]" || s == "*")
 				os << s;
 			else {
-				handle_ert(os, "{", context);
+				handle_ert(os, "{", context, false);
+				// s will end the current layout and begin a new one if necessary
 				os << s;
 				handle_ert(os, "}", context);
 			}
@@ -540,15 +620,26 @@ void parse_text(Parser & p, ostream & os
 
 		else if (t.cat() == catEnd) {
 			if (flags & FLAG_BRACE_LAST) {
-				context.check_end_layout(os);
 				return;
 			}
 			cerr << "stray '}' in text\n";
 			handle_ert(os, "}", context);
 		}
 
-		else if (t.cat() == catComment)
-			handle_comment(p);
+		else if (t.cat() == catComment) {
+			context.check_layout(os);
+			if (t.cs().size()) {
+				handle_comment(os, '%' + t.cs(), context);
+				if (p.next_token().cat() == catNewline) {
+					// A newline after a comment line starts a new paragraph
+					context.new_paragraph(os);
+					p.skip_spaces();
+				}
+			} else {
+				// "%\n" combination
+				p.skip_spaces();
+			}
+		}
 
 		//
 		// control sequences
@@ -588,8 +679,7 @@ void parse_text(Parser & p, ostream & os
 		}
 
 		else if (t.cs() == "item") {
-			// should be done automatically by Parser::tokenize
-			//p.skip_spaces();
+			p.skip_spaces();
 			string s;
 			bool optarg = false;
 			if (p.next_token().character() == '[') {
@@ -598,11 +688,10 @@ void parse_text(Parser & p, ostream & os
 				s = parse_text(p, FLAG_BRACK_LAST, outer, newcontext);
 				optarg = true;
 			}
-			context.need_layout = true;
-			context.has_item = true;
+			context.set_item();
 			context.check_layout(os);
 			if (optarg) {
-				if (active_environment() == "itemize") {
+				if (context.layout->labeltype != LABEL_MANUAL) {
 					// lyx does not support \item[\mybullet] in itemize environments
 					handle_ert(os, "[", context);
 					os << s;
@@ -610,13 +699,13 @@ void parse_text(Parser & p, ostream & os
 				} else if (s.size()) {
 					// The space is needed to separate the item from the rest of the sentence.
 					os << s << ' ';
+					p.skip_spaces();
 				}
 			}
 		}
 
 		else if (t.cs() == "bibitem") {
-			context.need_layout = true;
-			context.has_item = true;
+			context.set_item();
 			context.check_layout(os);
 			os << "\\bibitem ";
 			os << p.getOpt();
@@ -624,6 +713,7 @@ void parse_text(Parser & p, ostream & os
 		}
 
 		else if (t.cs() == "def") {
+			p.skip_spaces();
 			context.check_layout(os);
 			string name = p.get_token().cs();
 			while (p.next_token().cat() != catBegin)
@@ -631,20 +721,14 @@ void parse_text(Parser & p, ostream & os
 			handle_ert(os, "\\def\\" + name + '{' + p.verbatim_item() + '}', context);
 		}
 
-		else if (t.cs() == "par") {
+		else if (t.cs() == "noindent") {
 			p.skip_spaces();
-			context.check_end_layout(os);
-			context.need_layout = true;
+			context.extra_stuff += "\\noindent ";
 		}
 
 		else if (t.cs() == "appendix") {
-			context.check_end_layout(os);
-			Context newcontext(true, context.textclass, context.layout,
-					context.layout);
-			newcontext.check_layout(os);
-			os << "\\start_of_appendix\n";
-			parse_text(p, os, FLAG_END, outer, newcontext);
-			newcontext.check_end_layout(os);
+			p.skip_spaces();
+			context.extra_stuff += "\\start_of_appendix ";
 		}
 
 		// Must attempt to parse "Section*" before "Section".
@@ -655,12 +739,14 @@ void parse_text(Parser & p, ostream & os
 			 newlayout->isCommand()) {
 			p.get_token();
 			output_command_layout(os, p, outer, context, newlayout);
+			p.skip_spaces();
 		}
 
 		// The single '=' is meant here.
 		else if ((newlayout = findLayout(context.textclass, t.cs())).get() &&
 			 newlayout->isCommand()) {
 			output_command_layout(os, p, outer, context, newlayout);
+			p.skip_spaces();
 		}
 
 		else if (t.cs() == "includegraphics") {
@@ -763,22 +849,25 @@ void parse_text(Parser & p, ostream & os
 		}
 
 		else if (t.cs() == "footnote") {
+			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "Foot\n");
-			os << "collapsed true\n";
+			os << "collapsed true\n\n";
 			parse_text_in_inset(p, os, FLAG_ITEM, false, context);
 			end_inset(os);
 		}
 
 		else if (t.cs() == "marginpar") {
+			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "Marginal\n");
-			os << "collapsed true\n";
+			os << "collapsed true\n\n";
 			parse_text_in_inset(p, os, FLAG_ITEM, false, context);
 			end_inset(os);
 		}
 
 		else if (t.cs() == "ensuremath") {
+			p.skip_spaces();
 			context.check_layout(os);
 			Context newcontext(false, context.textclass);
 			string s = parse_text(p, FLAG_ITEM, false, newcontext);
@@ -793,12 +882,16 @@ void parse_text(Parser & p, ostream & os
 			context.check_layout(os);
 			os << "\n\\hfill\n";
 			skip_braces(p);
+			p.skip_spaces();
 		}
 
-		else if (t.cs() == "makeindex" || t.cs() == "maketitle")
+		else if (t.cs() == "makeindex" || t.cs() == "maketitle") {
+			p.skip_spaces();
 			skip_braces(p); // swallow this
+		}
 
 		else if (t.cs() == "tableofcontents") {
+			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "LatexCommand \\tableofcontents\n");
 			end_inset(os);
@@ -806,6 +899,7 @@ void parse_text(Parser & p, ostream & os
 		}
 
 		else if (t.cs() == "listoffigures") {
+			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "FloatList figure\n");
 			end_inset(os);
@@ -813,6 +907,7 @@ void parse_text(Parser & p, ostream & os
 		}
 
 		else if (t.cs() == "listoftables") {
+			p.skip_spaces();
 			context.check_layout(os);
 			begin_inset(os, "FloatList table\n");
 			end_inset(os);
@@ -820,6 +915,7 @@ void parse_text(Parser & p, ostream & os
 		}
 
 		else if (t.cs() == "listof") {
+			p.skip_spaces(true);
 			string const name = p.get_token().asString();
 			if (context.textclass.floats().typeExist(name)) {
 				context.check_layout(os);
@@ -906,6 +1002,7 @@ void parse_text(Parser & p, ostream & os
 
 		else if (is_known(t.cs(), known_quotes)) {
 			char const ** where = is_known(t.cs(), known_quotes);
+			context.check_layout(os);
 			begin_inset(os, "Quotes ");
 			os << known_coded_quotes[where - known_quotes];
 			end_inset(os);
@@ -916,6 +1013,7 @@ void parse_text(Parser & p, ostream & os
 			char const ** where = is_known(t.cs(), known_sizes);
 			context.check_layout(os);
 			os << "\n\\size " << known_coded_sizes[where - known_sizes] << "\n";
+			p.skip_spaces();
 		}
 
 		else if (t.cs() == "LyX" || t.cs() == "TeX"
@@ -1096,6 +1194,35 @@ void parse_text(Parser & p, ostream & os
 			end_inset(os);
 		}
 
+		else if ( t.cs() == "smallskip" ||
+		          t.cs() == "medskip" ||
+			  t.cs() == "bigskip" ||
+			  t.cs() == "vfill" ||
+		         (t.cs() == "vspace" && p.next_token().asInput() != "*")) {
+			string arg;
+			if (t.cs() == "vspace")
+				arg = p.getArg('{', '}');
+			else
+				arg = t.cs();
+			// We may only add the vspace to the current context if the
+			// current paragraph is not empty.
+			if (context.atParagraphStart()
+			    && (p.next_token().cat() != catNewline || p.next_token().cs().size() == 1)
+			    && (! (p.next_token().cat() == catEscape && p.next_token().cs() == "end"))
+			    && (! (p.next_token().cat() == catEscape && p.next_token().cs() == "par"))) {
+				context.extra_stuff += "\\added_space_top " + arg + " ";
+				p.skip_spaces();
+			} else {
+				if (t.cs() == "vspace")
+					handle_ert(os, t.asInput() + '{' + arg + '}', context);
+				else
+					handle_ert(os, t.asInput(), context);
+			}
+			// Would be nice to recognize added_space_bottom too...
+			// At the moment this is parsed as added_space_top of the
+			// next paragraph.
+		}
+
 		else if (t.cs() == "psfrag") {
 			// psfrag{ps-text}[ps-pos][tex-pos]{tex-text}
 			// TODO: Generalize this!
@@ -1103,7 +1230,6 @@ void parse_text(Parser & p, ostream & os
 			arguments += '}';
 			arguments += p.getOpt();
 			arguments += p.getOpt();
-			p.skip_spaces();
 			handle_ert(os, "\\psfrag{" + arguments, context);
 		}
 
@@ -1122,7 +1248,13 @@ void parse_text(Parser & p, ostream & os
 			handle_ert(os, s + ' ', context);
 			*/
 			context.check_layout(os);
-			handle_ert(os, t.asInput() + ' ', context);
+			string name = t.asInput();
+			if (p.next_token().asInput() == "*") {
+				// Starred commands like \vspace*{}
+				p.get_token();				// Eat '*'
+				name += '*';
+			}
+			handle_ert(os, name, context);
 		}
 
 		if (flags & FLAG_LEAVE) {

Re: [patch] tex2lyx whitespace changes

Reply via email to