[patch] log file parsing reworked

Jürgen Spitzmüller Sat, 17 Feb 2007 22:52:17 -0800

I've further reworked the log file parsing in order to fix dependency 
checking.


Here's what I came up with. It's a good deal more complicated than the 
previous parsing (and more complicated than I wished), however, this version 
fixes all problems I'm aware of, including all sorts of filenames with spaces 
and linebreaks on filenames (bug 1027). It also fixes the problems with some 
lines that have not been found, as I reported in another thread.

I'm pretty sure this will fix all cases of "LyX does not update properly after 
I have changed file/image x".

José, can this go in before the beta?

Jürgen

Index: src/LaTeX.C
===================================================================
--- src/LaTeX.C	(Revision 17235)
+++ src/LaTeX.C	(Arbeitskopie)
@@ -49,6 +49,7 @@
 using support::quoteName;
 using support::removeExtension;
 using support::rtrim;
+using support::rsplit;
 using support::split;
 using support::subst;
 using support::suffixIs;
@@ -722,10 +723,10 @@
 
 namespace {
 
-void handleFoundFile(string const & ff, DepTable & head)
+bool handleFoundFile(string const & ff, DepTable & head)
 {
 	// convert from native os path to unix path
-	string const foundfile = os::internal_path(trim(ff));
+	string foundfile = os::internal_path(trim(ff));
 
 	lyxerr[Debug::DEPEND] << "Found file: " << foundfile << endl;
 
@@ -744,17 +745,47 @@
 		// On initial insert we want to do the update at once
 		// since this file cannot be a file generated by
 		// the latex run.
-		FileName const absname(foundfile);
+		FileName absname(foundfile);
 		if (fs::exists(absname.toFilesystemEncoding()) &&
-		    !fs::is_directory(absname.toFilesystemEncoding()))
-			head.insert(absname, true);
+		    !fs::is_directory(absname.toFilesystemEncoding())) {
+				head.insert(absname, true);
+				return true;
+		} else {
+			// check for spaces
+			string strippedfile = foundfile;
+			while (contains(strippedfile, " ")) {
+				// strip off part after last space and try again
+				string tmp = strippedfile;
+				string const stripoff =
+					rsplit(tmp, strippedfile, ' ');
+				absname.set(strippedfile);
+				if (fs::exists(absname.toFilesystemEncoding()) &&
+				    !fs::is_directory(absname.toFilesystemEncoding())) {
+					head.insert(absname, true);
+					return true;
+				}
+			}
+		}
+	}
 
-		return;
+	string onlyfile = onlyFilename(foundfile);
+	FileName absname(makeAbsPath(onlyfile));
+
+	// check for spaces
+	while (contains(foundfile, " ")) {
+		if (fs::exists(absname.toFilesystemEncoding()))
+			// everything o.k.
+			break;
+		else {
+			// strip off part after last space and try again
+			string strippedfile;
+			string const stripoff = rsplit(foundfile, strippedfile, ' ');
+			foundfile = strippedfile;
+			onlyfile = onlyFilename(strippedfile);
+			absname = makeAbsPath(onlyfile);
+		}
 	}
 
-	string const onlyfile = onlyFilename(foundfile);
-	FileName const absname(makeAbsPath(onlyfile));
-
 	// (2) foundfile is in the tmpdir
 	//     insert it into head
 	if (fs::exists(absname.toFilesystemEncoding())) {
@@ -781,12 +812,26 @@
 				<< endl;
 			head.insert(absname);
 		}
-	} else
+		return true;
+	} else {
 		lyxerr[Debug::DEPEND]
 			<< "Not a file or we are unable to find it."
 			<< endl;
+		return false;
+	}
 }
 
+
+bool checkLineBreak(string const & ff, DepTable & head)
+{
+	if (contains(ff, '.'))
+		// if we have a dot, we let handleFoundFile decide
+		return handleFoundFile(ff, head);
+	else
+		// else, we suspect a line break
+		return false;
+}
+
 } // anon namespace
 
 
@@ -798,16 +843,17 @@
 
 	string const logfile = onlyFilename(changeExtension(file.absFilename(), ".log"));
 
-	static regex reg1(".*\\([^)]+.*");
-	static regex reg2("File: ([^ ]+).*");
-	static regex reg3("No file ([^ ]+)\\..*");
-	static regex reg4("\\\\openout[0-9]+.*=.*`([^ ]+)'\\..*");
+	static regex reg1("File: (.+).*");
+	static regex reg2("No file (.+)(.).*");
+	static regex reg3("\\\\openout[0-9]+.*=.*`(.+)(..).*");
 	// If an index should be created, MikTex does not write a line like
 	//    \openout# = 'sample.idx'.
 	// but instead only a line like this into the log:
 	//   Writing index file sample.idx
-	static regex reg5("Writing index file ([^ ]+).*");
-	static regex regnomencl("Writing nomenclature file ([^ ]+).*");
+	static regex reg4("Writing index file (.+).*");
+	// files also can be enclosed in <...>
+	static regex reg5("<([^>]+)(.).*");
+	static regex regnomencl("Writing nomenclature file (.+).*");
 	// If a toc should be created, MikTex does not write a line like
 	//    \openout# = `sample.toc'.
 	// but only a line like this into the log:
@@ -815,19 +861,18 @@
 	// This line is also written by tetex.
 	// This line is not present if no toc should be created.
 	static regex miktexTocReg("[EMAIL PROTECTED]");
+	static regex reg6(".*\\([^)]+.*");
 
 	FileName const fn(makeAbsPath(logfile));
 	ifstream ifs(fn.toFilesystemEncoding().c_str());
+	string lastline;
 	while (ifs) {
 		// Ok, the scanning of files here is not sufficient.
 		// Sometimes files are named by "File:�xxx" only
 		// So I think we should use some regexps to find files instead.
-		// "\(([^ ()]+\.+[^ ()]+)" should match the "(file.ext " variant,
-		//  note that we can have several of these on one line.
-		// "File: ([^\.]+\.+[^ ]+).*" should match the "File: file.ext " variant.
-		// FIXME: sometimes, LaTeX inserts linebreaks while outputting
-		// file names. This case is not handled correctly (bug 1027).
+		// Note: all file names and paths might contains spaces.
 
+		bool found_file = false;
 		string token;
 		getline(ifs, token);
 		// MikTeX sometimes inserts \0 in the log file. They can't be
@@ -836,42 +881,132 @@
 		// \r's afterwards, since we need to remove them anyway.
 		token = subst(token, '\0', '\r');
 		token = subst(token, "\r", "");
-		if (token.empty())
+		if (token.empty() || token == ")") {
+			lastline = string();
 			continue;
+		}
 
+		// Sometimes, filenames are broken across lines.
+		// We care for that and save suspicious lines.
+		// Here we exclude some cases where we are sure 
+		// that there is no continued filename
+		if (prefixIs(token, "File:") || prefixIs(token, "(Font)")
+		    || prefixIs(token, "Package:") 
+		    || prefixIs(token, "Language:")
+		    || prefixIs(token, "LaTeX Font Info:") 
+		    || prefixIs(token, "\\openout[")
+		    || prefixIs(token, "))"))
+			lastline = string();
+
+		if (!lastline.empty())
+			// probably a continued filename from last line
+			token = lastline + token;
+
 		smatch sub;
 
 		// FIXME UNICODE: We assume that the file names in the log
 		// file are in the file system encoding.
 		token = to_utf8(from_filesystem8bit(token));
 
+		// (1) "File: file.ext"
 		if (regex_match(token, sub, reg1)) {
-			// search for strings in (...) that must not contain
-			// a blank, but must contain a dot
-			static regex reg1_1("\\(([^()]+\\.+[^ ()]+)");
+			// check for dot
+			found_file = checkLineBreak(sub.str(1), head);
+			// However, ...
+			if (suffixIs(token, ")"))
+				// no line break for sure
+				// pretend we've been succesfully searching
+				found_file = true;
+		// (2) "No file file.ext"
+		} else if (regex_match(token, sub, reg2)) {
+			// file names must contains a dot, line ends with dot
+			if (contains(sub.str(1), '.') && sub.str(2) == ".")
+				found_file = handleFoundFile(sub.str(1), head);
+			else
+				// we suspect a line break
+				found_file = false;
+		// (3) "\openout<nr> = `file.ext'."
+		} else if (regex_match(token, sub, reg3)) {
+			// search for closing '. at the end of the line
+			if (sub.str(2) == "\'.")
+				found_file = handleFoundFile(sub.str(1), head);
+			else
+				// probable line break
+				found_file = false;
+		// (4) "Writing index file file.ext"
+		} else if (regex_match(token, sub, reg4))
+			// check for dot
+			found_file = checkLineBreak(sub.str(1), head);
+		// (5) "<file.ext>"
+		else if (regex_match(token, sub, reg5)) {
+			// search for closing '>' and dot ('*.*>') at the eol
+			if (contains(sub.str(1), '.') && sub.str(2) == ">")
+				found_file = handleFoundFile(sub.str(1), head);
+			else
+				// probable line break
+				found_file = false;
+		// (6) "Writing nomenclature file file.ext"
+		} else if (regex_match(token, sub, regnomencl))
+			// check for dot
+			found_file = checkLineBreak(sub.str(1), head);
+		// (7) "[EMAIL PROTECTED]<nr>" (for MikTeX)
+		else if (regex_match(token, sub, miktexTocReg))
+			found_file = handleFoundFile(onlyFilename(changeExtension(
+						file.absFilename(), ".toc")), head);
+		else
+			// not found, but we won't check further
+			// pretend we've been succesfully searching
+			found_file = true;
+
+		// (8) "(file.ext"
+		// note that we can have several of these on one line
+		// this must be queried separated, because of
+		// cases such as "File: file.ext (type eps)"
+		// where "File: file.ext" would be skipped
+		if (regex_match(token, sub, reg6)) {
+			// search for strings in (...)
+			static regex reg6_1("\\(([^()]+)(.).*");
 			smatch what;
 			string::const_iterator first = token.begin();
 			string::const_iterator end = token.end();
 
-			while (regex_search(first, end, what, reg1_1)) {
-				first = what[0].second;
-				handleFoundFile(what.str(1), head);
+			while (regex_search(first, end, what, reg6_1)) {
+				// if we have a dot, try to handle as file
+				if (contains(what.str(1), '.')) {
+					first = what[0].second;
+					if (what.str(2) == ")") {
+						handleFoundFile(what.str(1), head);
+						// since we had a closing bracket, do not
+						// investigate further
+						found_file = true;
+					} else
+						// if we have no closing bracket,
+						// try to handle as file nevertheless
+						found_file = handleFoundFile(what.str(1) + what.str(2), head);
+				}
+				// if we do not have a dot, check if the line has
+				// a closing bracket (else, we suspect a line break)
+				else if (what.str(2) != ")") {
+					first = what[0].second;
+					found_file = false;
+				} else {
+					// we have a closing bracket, so the content
+					// is not a file name.
+					// no need to investigate further
+					// pretend we've been succesfully searching
+					first = what[0].second;
+					found_file = true;
+				}
 			}
-		} else if (regex_match(token, sub, reg2))
-			handleFoundFile(sub.str(1), head);
-		else if (regex_match(token, sub, reg3))
-			handleFoundFile(sub.str(1), head);
-		else if (regex_match(token, sub, reg4))
-			handleFoundFile(sub.str(1), head);
-		else if (regex_match(token, sub, reg5))
-			handleFoundFile(sub.str(1), head);
-		else if (regex_match(token, sub, regnomencl))
-			handleFoundFile(sub.str(1), head);
-		else if (regex_match(token, sub, miktexTocReg))
-			handleFoundFile(onlyFilename(changeExtension(file.absFilename(), ".toc")), head);
+		}
+
+		if (!found_file)
+			lastline = token;
+		else
+			lastline = string();
 	}
 
-	// Make sure that the main .tex file is in the dependancy file.
+	// Make sure that the main .tex file is in the dependency file.
 	head.insert(file, true);
 }

[patch] log file parsing reworked

Reply via email to