[patch] partial fix for bug 220

Georg Baum Sat, 11 Dec 2004 12:49:05 -0800

The attached patch adds partial longtable support and some small fixes to 
tex2lyx. This is needed for bug 220 (gnumeric table import). Double 
hlines (firstHeadTopDL etc. in LyX) are missing. The plan is to add this 
in a second step. That will also fix double hlines in normal tables.
I copied some structs from tabular.h instead of including 
math_gridinset.h, because I needed to modify them. I guess that these 
(and some common code) could go to a common base class for tex2lyx table 
stuff and LyXTabular when the table import is "finished", but for now I 
don't want to touch tabular.C too much.
I had to rearrange the code quite a bit, but I think that it is 
understandable now.
OK to commit?



Georg

diff -p -r -U 3 -X excl.tmp lyx-1.4-clean/src/tex2lyx/ChangeLog lyx-1.4-cvs/src/tex2lyx/ChangeLog
--- lyx-1.4-clean/src/tex2lyx/ChangeLog	2004-12-06 20:39:12.000000000 +0100
+++ lyx-1.4-cvs/src/tex2lyx/ChangeLog	2004-12-11 20:45:44.000000000 +0100
@@ -1,3 +1,17 @@
+2004-12-11  Georg Baum  <[EMAIL PROTECTED]>
+
+	* table.C (ColInfo, LTRowType, RowInfo, CellInfo, verbose_align):
+	new, extended from math_gridinfo.h and tabular.h
+	* table.C (write_attribute): new, from tabular.C
+	* table.C (read_hlines): remove
+	* table.C (parse_hlines): new
+	* table.C (RowPosition): new
+	* table.C (parse_table): handle comments in hlines
+	* table.C (parse_table, handle_tabular): add longtable support
+	* table.C (handle_tabular): fix "unexpected dummy size" error message
+	* table.C (handle_tabular): fix off by one error for clines
+	* text.C (parse_environment): add longtable support
+
 2004-12-05  Lars Gullik Bjønnes  <[EMAIL PROTECTED]>
 
 	* Makefile.am (tex2lyx_LDADD): use boost variables
diff -p -r -U 3 -X excl.tmp lyx-1.4-clean/src/tex2lyx/table.C lyx-1.4-cvs/src/tex2lyx/table.C
--- lyx-1.4-clean/src/tex2lyx/table.C	2004-11-27 14:44:28.000000000 +0100
+++ lyx-1.4-cvs/src/tex2lyx/table.C	2004-12-11 20:49:07.000000000 +0100
@@ -5,6 +5,7 @@
  *
  * \author André Pönitz
  * \author Jean-Marc Lasgouttes
+ * \author Georg Baum
  *
  * Full author contact details are available in file CREDITS.
  */
@@ -30,7 +36,6 @@ using std::ostringstream;
 using std::string;
 using std::vector;
 
-#include "mathed/math_gridinfo.h"
 
 // filled in preamble.C
 std::map<char, int> special_columns;
@@ -38,6 +43,98 @@ std::map<char, int> special_columns;
 
 namespace {
 
+struct ColInfo
+{
+	ColInfo() : align('c'), rightline(false), leftline(false) {}
+	/// column alignment
+	char align;
+	/// column width
+	string width;
+	/// special column alignment
+	string special;
+	/// how many lines on the right?
+	int rightline;
+	/// a line on the left?
+	bool leftline;
+};
+
+
+/// row type for longtables
+enum LTRowType
+{
+	/// normal row
+	LT_NORMAL,
+	/// part of head
+	LT_HEAD,
+	/// part of head on first page
+	LT_FIRSTHEAD,
+	/// part of foot
+	LT_FOOT,
+	/// part of foot on last page
+	LT_LASTFOOT
+};
+
+
+struct RowInfo
+{
+	RowInfo() : topline(false), bottomline(false), type(LT_NORMAL),
+	            newpage(false) {}
+	/// horizontal line above
+	bool topline;
+	/// horizontal line below
+	bool bottomline;
+	/// These are for longtabulars only
+	/// row type (head, foot, firsthead etc.)
+	LTRowType type;
+	/// row for a pagebreak
+	bool newpage;
+};
+
+
+struct CellInfo
+{
+	CellInfo() : multi(0), align('n'), leftline(false), rightline(false),
+	             topline(false), bottomline(false) {}
+	/// cell content
+	string content;
+	/// multicolumn flag
+	int multi;
+	/// cell alignment
+	char align;
+	/// do we have a line on the left?
+	bool leftline;
+	/// do we have a line on the right?
+	bool rightline;
+	/// do we have a line above?
+	bool topline;
+	/// do we have a line below?
+	bool bottomline;
+};
+
+
+/// translate a horizontal alignment (as stored in ColInfo and CellInfo) to LyX
+inline char const * verbose_align(char c)
+{
+	return c == 'c' ? "center" : c == 'r' ? "right" : c == 'l' ? "left" : "none";
+}
+
+
+// stripped down from tabluar.C. We use it currently only for bools and
+// strings
+string const write_attribute(string const & name, bool const & b)
+{
+	// we write only true attribute values so we remove a bit of the
+	// file format bloat for tabulars.
+	return b ? ' ' + name + "=\"true\"" : string();
+}
+
+
+string const write_attribute(string const & name, string const & s)
+{
+	return s.empty() ? string() : ' ' + name + "=\"" + s + '"';
+}
+
+
 int string2int(string const & s, int deflt = 0)
 {
 	istringstream is(s);
@@ -47,27 +144,6 @@ int string2int(string const & s, int def
 }
 
 
-string read_hlines(Parser & p)
-{
-	ostringstream os;
-	p.skip_spaces();
-	while (p.good()) {
-		if (p.next_token().cs() == "hline") {
-			p.get_token();
-			os << "\\hline";
-		} else if (p.next_token().cs() == "cline") {
-			p.get_token();
-			os << "\\cline{" << p.verbatim_item() << "}";
-		} else
-			break;
-		p.skip_spaces();
-	};
-	//cerr << "read_hlines(), read: '" << os.str() << "'\n";
-	//cerr << "read_hlines(), next token: " << p.next_token() << "\n";
-	return os.str();
-}
-
-
 /* rather brutish way to code table structure in a string:
 
   \begin{tabular}{ccc}
@@ -78,16 +154,17 @@ string read_hlines(Parser & p)
 
  gets "translated" to:
 
-  1 TAB 2 TAB 3 LINE
-  \hline HLINE  TAB 5 LINE
-  5 TAB 7 LINE
+         HLINE 1 TAB 2               TAB 3 HLINE HLINE LINE
+  \hline HLINE \multicolumn{2}{c}{4} TAB 5 HLINE HLINE LINE
+         HLINE 6 TAB 7                     HLINE HLINE LINE
 */
 
 char const TAB   = '\001';
 char const LINE  = '\002';
 char const HLINE = '\004';
 
 
+/// handle column specifications for tabulars and multicolumns
 void handle_colalign(Parser & p, vector<ColInfo> & colinfo)
 {
 	if (p.get_token().cat() != catBegin)
@@ -167,13 +276,60 @@ void handle_colalign(Parser & p, vector<
 }
 
 
-} // anonymous namespace
+/*!
+ * Parse hlines and similar stuff.
+ * \returns wether the token \p t was parsed
+ */
+bool parse_hlines(Parser & p, Token const & t, string & hlines,
+                  bool is_long_tabular)
+{
+	BOOST_ASSERT(t.cat() == catEscape);
 
+	if (t.cs() == "hline")
+		hlines += "\\hline";
+
+	else if (t.cs() == "cline")
+		hlines += "\\cline{" + p.verbatim_item() + '}';
+
+	else if (is_long_tabular && t.cs() == "newpage")
+		hlines += "\\newpage";
+
+	else
+		return false;
+
+	return true;
+}
 
-void parse_table(Parser & p, ostream & os, unsigned flags)
+
+/// Position in a row
+enum RowPosition {
+	/// At the very beginning, before the first token
+	ROW_START,
+	/// After the first token and before any column token
+	IN_HLINES_START,
+	/// After the first column token. Comments and whitespace are only
+	/// treated as tokens in this position
+	IN_COLUMNS,
+	/// After the first non-column token at the end
+	IN_HLINES_END
+};
+
+
+/*!
+ * Parse table structure.
+ * We parse tables in a two-pass process: This function extracts the table
+ * structure (rows, columns, hlines etc.), but does not change the cell
+ * content. The cell content is parsed in a second step in handle_tabular().
+ */
+void parse_table(Parser & p, ostream & os, bool is_long_tabular,
+                 RowPosition & pos, unsigned flags)
 {
+	// table structure commands such as \hline
 	string hlines;
 
+	// comments that occur at places where we can't handle them
+	string comments;
+
 	while (p.good()) {
 		Token const & t = p.get_token();
 
@@ -181,6 +337,126 @@ void parse_table(Parser & p, ostream & o
 		cerr << "t: " << t << " flags: " << flags << "\n";
 #endif
 
+		// comments and whitespace in hlines
+		switch (pos) {
+		case ROW_START:
+		case IN_HLINES_START:
+		case IN_HLINES_END:
+			if (t.cat() == catComment) {
+				if (t.cs().empty())
+					// line continuation
+					p.skip_spaces();
+				else
+					// We can't handle comments here,
+					// store them for later use
+					comments += t.asInput();
+				continue;
+			} else if (t.cat() == catSpace ||
+			           t.cat() == catNewline) {
+				// whitespace is irrelevant here, we
+				// need to recognize hline stuff
+				p.skip_spaces();
+				continue;
+			}
+			break;
+		case IN_COLUMNS:
+			break;
+		}
+
+		// We need to handle structure stuff first in order to
+		// determine wether we need to output a HLINE separator
+		// before the row or not.
+		if (t.cat() == catEscape) {
+			if (parse_hlines(p, t, hlines, is_long_tabular)) {
+				switch (pos) {
+				case ROW_START:
+					pos = IN_HLINES_START;
+					break;
+				case IN_COLUMNS:
+					pos = IN_HLINES_END;
+					break;
+				case IN_HLINES_START:
+				case IN_HLINES_END:
+					break;
+				}
+				continue;
+			}
+
+			else if (t.cs() == "tabularnewline" ||
+			         t.cs() == "\\" ||
+			         t.cs() == "cr") {
+				if (t.cs() == "cr")
+					cerr << "Warning: Converting TeX "
+					        "'\\cr' to LaTeX '\\\\'."
+					     << endl;
+				// stuff before the line break
+				os << comments << HLINE << hlines << HLINE
+				   << LINE;
+				//cerr << "hlines: " << hlines << endl;
+				hlines.erase();
+				comments.erase();
+				pos = ROW_START;
+				continue;
+			}
+
+			else if (is_long_tabular &&
+			         (t.cs() == "endhead" ||
+			          t.cs() == "endfirsthead" ||
+			          t.cs() == "endfoot" ||
+			          t.cs() == "endlastfoot")) {
+				hlines += t.asInput();
+				switch (pos) {
+				case IN_COLUMNS:
+				case IN_HLINES_END:
+					// these commands are implicit line
+					// breaks
+					os << comments << HLINE << hlines
+					   << HLINE << LINE;
+					hlines.erase();
+					comments.erase();
+					pos = ROW_START;
+					break;
+				case ROW_START:
+					pos = IN_HLINES_START;
+					break;
+				case IN_HLINES_START:
+					break;
+				}
+				continue;
+			}
+
+		}
+
+		// We need a HLINE separator if we either have no hline
+		// stuff at all and are just starting a row or if we just
+		// got the first non-hline token.
+		switch (pos) {
+		case ROW_START:
+			// no hline tokens exist, first token at row start
+		case IN_HLINES_START:
+			// hline tokens exist, first non-hline token at row
+			// start
+			os << hlines << HLINE << comments;
+			hlines.erase();
+			comments.erase();
+			pos = IN_COLUMNS;
+			break;
+		case IN_HLINES_END:
+			// Oops, there is still cell content after hline
+			// stuff. This does not work in LaTeX, so we ignore
+			// the hlines.
+			cerr << "Ignoring '" << hlines << "' in a cell"
+			     << endl;
+			os << comments;
+			hlines.erase();
+			comments.erase();
+			pos = IN_COLUMNS;
+			break;
+		case IN_COLUMNS:
+			break;
+		}
+
+		// If we come here we have normal cell content
 		//
 		// cat codes
 		//
@@ -215,7 +491,8 @@ void parse_table(Parser & p, ostream & o
 
 		else if (t.cat() == catBegin) {
 			os << '{';
-			parse_table(p, os, FLAG_BRACE_LAST);
+			parse_table(p, os, is_long_tabular, pos,
+			            FLAG_BRACE_LAST);
 			os << '}';
 		}
 
@@ -230,23 +507,6 @@ void parse_table(Parser & p, ostream & o
 			p.skip_spaces();
 		}
 
-		else if (t.cs() == "tabularnewline" || t.cs() == "\\" ||
-		         t.cs() == "cr") {
-			if (t.cs() == "cr")
-				cerr << "Warning: Converting TeX '\\cr' to LaTeX '\\\\'."
-				     << endl;
-			// stuff before the line break
-			// and look ahead for stuff after the line break
-			os << HLINE << hlines << HLINE << LINE << read_hlines(p) << HLINE;
-			hlines.erase();
-		}
-
-		else if (t.cs() == "hline")
-			hlines += "\\hline";
-
-		else if (t.cs() == "cline")
-			hlines += "\\cline{" + p.verbatim_item() + '}';
-
 		else if (t.cat() == catComment)
 			os << t.asInput();
 
@@ -269,7 +529,8 @@ void parse_table(Parser & p, ostream & o
 			if (is_math_env(name)) {
 				parse_math(p, os, FLAG_END, MATH_MODE);
 			} else {
-				parse_table(p, os, FLAG_END);
+				parse_table(p, os, is_long_tabular, pos,
+				            FLAG_END);
 			}
 			os << "\\end{" << name << '}';
 			active_environments.pop_back();
@@ -290,6 +551,16 @@ void parse_table(Parser & p, ostream & o
 		else
 			os << t.asInput();
 	}
+
+	// We can have comments if the last line is incomplete
+	os << comments;
+
+	// We can have hline stuff if the last line is incomplete
+	if (!hlines.empty()) {
+		// this does not work in LaTeX, so we ignore it
+		cerr << "Ignoring '" << hlines << "' at end of tabular"
+		     << endl;
+	}
 }
 
 
@@ -309,27 +580,33 @@ void handle_hline_below(RowInfo & ri, ve
 }
 
 
-void handle_tabular(Parser & p, ostream & os,
+} // anonymous namespace
+
+
+void handle_tabular(Parser & p, ostream & os, bool is_long_tabular,
 		    Context & context)
 {
 	string posopts = p.getOpt();
-	if (!posopts.empty())
-		cerr << "vertical tabular positioning '" << posopts << "' ignored\n";
+	if (!posopts.empty()) {
+		if (is_long_tabular)
+			cerr << "horizontal longtable";
+		else
+			cerr << "vertical tabular";
+		cerr << " positioning '" << posopts << "' ignored\n";
+	}
 
 	vector<ColInfo>            colinfo;
 
 	// handle column formatting
 	handle_colalign(p, colinfo);
 
-	// handle initial hlines
-
 	// first scan of cells
 	// use table mode to keep it minimal-invasive
 	// not exactly what's TeX doing...
 	vector<string> lines;
 	ostringstream ss;
-	ss << read_hlines(p) << HLINE; // handle initial hlines
-	parse_table(p, ss, FLAG_END);
+	RowPosition rowpos = ROW_START;
+	parse_table(p, ss, is_long_tabular, rowpos, FLAG_END);
 	split(ss.str(), lines, LINE);
 
 	vector< vector<CellInfo> > cellinfo(lines.size());
@@ -348,8 +625,10 @@ void handle_tabular(Parser & p, ostream 
 		split(lines[row], dummy, HLINE);
 
 		// handle horizontal line fragments
+		// we do only expect this for a last line without '\\'
 		if (dummy.size() != 3) {
-			if (dummy.size() != 1)
+			if ((dummy.size() != 1 && dummy.size() != 2) ||
+			    row != rowinfo.size() - 1)
 				cerr << "unexpected dummy size: " << dummy.size()
 					<< " content: " << lines[row] << "\n";
 			dummy.resize(3);
@@ -392,17 +671,17 @@ void handle_tabular(Parser & p, ostream 
 					if (from >= colinfo.size()) {
 						cerr << "cline starts at non "
 						        "existing column "
-						     << from << endl;
+						     << (from + 1) << endl;
 						from = colinfo.size() - 1;
 					}
-					size_t to = string2int(t[1]);
+					size_t to = string2int(t[1]) - 1;
 					if (to >= colinfo.size()) {
 						cerr << "cline ends at non "
 						        "existing column "
-						     << to << endl;
+						     << (to + 1) << endl;
 						to = colinfo.size() - 1;
 					}
-					for (size_t col = from; col < to; ++col) {
+					for (size_t col = from; col <= to; ++col) {
 						//cerr << "row: " << row << " col: " << col << " i: " << i << endl;
 						if (i == 0) {
 							rowinfo[row].topline = true;
@@ -412,6 +691,49 @@ void handle_tabular(Parser & p, ostream 
 							cellinfo[row][col].bottomline = true;
 						}
 					}
+				} else if (t.cs() == "endhead") {
+					if (i > 0)
+						rowinfo[row].type = LT_HEAD;
+					for (int r = row - 1; r >= 0; --r) {
+						if (rowinfo[r].type != LT_NORMAL)
+							break;
+						rowinfo[r].type = LT_HEAD;
+					}
+				} else if (t.cs() == "endfirsthead") {
+					if (i > 0)
+						rowinfo[row].type = LT_FIRSTHEAD;
+					for (int r = row - 1; r >= 0; --r) {
+						if (rowinfo[r].type != LT_NORMAL)
+							break;
+						rowinfo[r].type = LT_FIRSTHEAD;
+					}
+				} else if (t.cs() == "endfoot") {
+					if (i > 0)
+						rowinfo[row].type = LT_FOOT;
+					for (int r = row - 1; r >= 0; --r) {
+						if (rowinfo[r].type != LT_NORMAL)
+							break;
+						rowinfo[r].type = LT_FOOT;
+					}
+				} else if (t.cs() == "endlastfoot") {
+					if (i > 0)
+						rowinfo[row].type = LT_LASTFOOT;
+					for (int r = row - 1; r >= 0; --r) {
+						if (rowinfo[r].type != LT_NORMAL)
+							break;
+						rowinfo[r].type = LT_LASTFOOT;
+					}
+				} else if (t.cs() == "newpage") {
+					if (i == 0) {
+						if (row > 0)
+							rowinfo[row - 1].newpage = true;
+						else
+							cerr << "Ignoring "
+							        "'\\newpage' "
+							        "before rows."
+							     << endl;
+					} else
+						rowinfo[row].newpage = true;
 				} else {
 					cerr << "unexpected line token: " << t << endl;
 				}
@@ -498,33 +820,38 @@ void handle_tabular(Parser & p, ostream 
 	//cerr << "// output what we have\n";
 	// output what we have
 	os << "\n<lyxtabular version=\"3\" rows=\"" << rowinfo.size()
-		 << "\" columns=\"" << colinfo.size() << "\">\n"
-		 << "<features>\n";
+	   << "\" columns=\"" << colinfo.size() << "\">\n";
+	os << "<features"
+	   << write_attribute("islongtable", is_long_tabular)
+	   << ">\n";
 
 	//cerr << "// after header\n";
 	for (size_t col = 0; col < colinfo.size(); ++col) {
 		os << "<column alignment=\""
-		   << verbose_align(colinfo[col].align) << "\"";
-		os << " valignment=\"top\"";
-		if (colinfo[col].leftline)
-			os << " leftline=\"true\"";
-		if (colinfo[col].rightline)
-			os << " rightline=\"true\"";
-		if (!colinfo[col].width.empty())
-			os << " width=\"" << colinfo[col].width << "\"";
-		if (!colinfo[col].special.empty())
-			os << " special=\"" << colinfo[col].special << "\"";
-		os << ">\n";
+		   << verbose_align(colinfo[col].align) << "\""
+		   << " valignment=\"top\""
+		   << write_attribute("leftline", colinfo[col].leftline)
+		   << write_attribute("rightline", colinfo[col].rightline)
+		   << write_attribute("width", colinfo[col].width)
+		   << write_attribute("special", colinfo[col].special)
+		   << ">\n";
 	}
 	//cerr << "// after cols\n";
 
 	for (size_t row = 0; row < rowinfo.size(); ++row) {
-		os << "<row";
-		if (rowinfo[row].topline)
-			os << " topline=\"true\"";
-		if (rowinfo[row].bottomline)
-			os << " bottomline=\"true\"";
-		os << ">\n";
+		os << "<row"
+		   << write_attribute("topline", rowinfo[row].topline)
+		   << write_attribute("bottomline", rowinfo[row].bottomline)
+		   << write_attribute("endhead",
+		                      rowinfo[row].type == LT_HEAD)
+		   << write_attribute("endfirsthead",
+		                      rowinfo[row].type == LT_FIRSTHEAD)
+		   << write_attribute("endfoot",
+		                      rowinfo[row].type == LT_FOOT)
+		   << write_attribute("endlastfoot",
+		                      rowinfo[row].type == LT_LASTFOOT)
+		   << write_attribute("newpage", rowinfo[row].newpage)
+		   << ">\n";
 		for (size_t col = 0; col < colinfo.size(); ++col) {
 			CellInfo const & cell = cellinfo[row][col];
 			os << "<cell";
@@ -532,15 +859,11 @@ void handle_tabular(Parser & p, ostream 
 				os << " multicolumn=\"" << cell.multi << "\"";
 			os << " alignment=\"" << verbose_align(cell.align)
 			   << "\""
-			   << " valignment=\"top\"";
-			if (cell.topline)
-				os << " topline=\"true\"";
-			if (cell.bottomline)
-				os << " bottomline=\"true\"";
-			if (cell.leftline)
-				os << " leftline=\"true\"";
-			if (cell.rightline)
-				os << " rightline=\"true\"";
+			   << " valignment=\"top\""
+			   << write_attribute("topline", cell.topline)
+			   << write_attribute("bottomline", cell.bottomline)
+			   << write_attribute("leftline", cell.leftline)
+			   << write_attribute("rightline", cell.rightline);
 			//cerr << "\nrow: " << row << " col: " << col;
 			//if (cell.topline)
 			//	cerr << " topline=\"true\"";
diff -p -r -U 3 -X excl.tmp lyx-1.4-clean/src/tex2lyx/tex2lyx.h lyx-1.4-cvs/src/tex2lyx/tex2lyx.h
--- lyx-1.4-clean/src/tex2lyx/tex2lyx.h	2004-08-10 11:35:51.000000000 +0200
+++ lyx-1.4-cvs/src/tex2lyx/tex2lyx.h	2004-11-09 21:27:28.000000000 +0100
@@ -47,7 +47,8 @@ void parse_math(Parser & p, std::ostream
 
 
 /// in table.C
-void handle_tabular(Parser & p, std::ostream & os, Context & context);
+void handle_tabular(Parser & p, std::ostream & os, bool is_long_tabular,
+                    Context & context);
 
 
 /// in tex2lyx.C
diff -p -r -U 3 -X excl.tmp lyx-1.4-clean/src/tex2lyx/text.C lyx-1.4-cvs/src/tex2lyx/text.C
--- lyx-1.4-clean/src/tex2lyx/text.C	2004-10-21 15:57:36.000000000 +0200
+++ lyx-1.4-cvs/src/tex2lyx/text.C	2004-11-25 09:06:47.000000000 +0100
@@ -651,10 +782,10 @@ void parse_environment(Parser & p, ostre
 		end_inset(os);
 	}
 
-	else if (name == "tabular") {
+	else if (name == "tabular" || name == "longtable") {
 		parent_context.check_layout(os);
 		begin_inset(os, "Tabular ");
-		handle_tabular(p, os, parent_context);
+		handle_tabular(p, os, name == "longtable", parent_context);
 		end_inset(os);
 	}

[patch] partial fix for bug 220

Reply via email to