This patch adds mtime checks to speed up the updating of the DepTable. It is quite effective for me (I don't have istreambuf_iterator).
I have thrown my thesis at it, and it works... you guys may wish to test it some more before comitting it. I have also added two new functions which I needed to add glossary support. These are haschanged( set<string> const & files ) remove_file(const string & filename ) I also found and fixed the problem that Lars mentioned. Could someone else please try this on HEAD? I intend to try mmap for the CRC next. It seems that there are already tests for HAS_MMAP in the autoconf system... Ben.
--- lyx-1.1.6fix3-orig/src/DepTable.C Wed Nov 15 14:22:06 2000 +++ lyx-1.1.6fix3/src/DepTable.C Tue Nov 27 18:38:48 2001 @@ -18,8 +18,15 @@ #endif #include "DepTable.h" +#include "debug.h" + #include "support/lyxlib.h" #include "support/filetools.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + #include <fstream> using std::make_pair; @@ -27,6 +34,39 @@ using std::ifstream; using std::endl; +/** \class DepTable + DepTable provides a list of files and information on + whether their contents have changed since the last time + the file was checked. + + This is performed internally using CRC computations and mtime checks. + + Computing the CRC of every file takes 28-33 seconds on my thesis... + (That is because a large number of large .eps files get entered into + the deptable..., and I do not have istreambuf_iterator :-( ) + + Why does this class use CRC checks instead + of using the dates on the files? + + Well the answer to that is that LaTeX re-writes it's .aux files + every time it runs. This will change the modification date/time, + but the file might be exactly the same. You have to + determine if the new one is different from the old one + by doing a CRC or similar. + + 20011126 bstanley If you were to maintain a record of the last modified +date/time + for each file, and if when you come to check it again and update the CRC, + you could avoid re-calculating the CRC if the date/time of the file + has not changed.... This produces a significant time saving, + whilst still providing the same functionality to clients. bstanley + + 20011127 bstanley Applied patch by LGB which implements this date based + shortcut method. Speedup is significant except when the file is first entered + into the deptable. Tested on large thesis with many .eps files, bibliography, + glossary, index, etc. +*/ + + void DepTable::insert(string const & fi, bool upd, unsigned long one, @@ -35,31 +75,62 @@ // not quite sure if this is the correct place for MakeAbsPath string f = MakeAbsPath(fi); if (deplist.find(f) == deplist.end()) { + unsigned long mtime = 0; if (upd) { one = two; + lyxerr[Debug::DEPEND] << " CRC..." << flush; two = lyx::sum(f); + lyxerr[Debug::DEPEND] << "done." << endl; + struct stat f_info; + stat(fi.c_str(), &f_info); + mtime = f_info.st_mtime; } - deplist[f] = make_pair(one, two); + dep_info di; + di.first = one; + di.second = two; + di.mtime = mtime; + deplist[f] = di; } } void DepTable::update() { + lyxerr[Debug::DEPEND] << "Computing file checksums..." << endl; + time_t start_time = time(0); for (DepList::iterator itr = deplist.begin(); itr != deplist.end(); ++itr) { + + long const mtime = itr->second.mtime; + struct stat f_info; + stat(itr->first.c_str(), &f_info); + + if (mtime == f_info.st_mtime) { + continue; + } + unsigned long const one = (*itr).second.second; unsigned long const two = lyx::sum((*itr).first); - (*itr).second = make_pair(one, two); + + dep_info di; + di.first = one; + di.second = two; + di.mtime = f_info.st_mtime; + + itr->second = di; + if (lyxerr.debugging(Debug::DEPEND)) { lyxerr << "Update dep: " << (*itr).first << " " - << one << " " << two; + << one << " " << two << " " << f_info.st_mtime; if (one != two) lyxerr << " +"; lyxerr << endl; } } + time_t time_sec = time(0) - start_time; + lyxerr[Debug::DEPEND] << "Finished computing file checksums (" + << time_sec << " sec)." << endl; } @@ -87,6 +158,16 @@ return false; } +bool DepTable::haschanged(std::set<string> const & fs) const +{ + std::set<string>::iterator i; + for( i = fs.begin(); i != fs.end(); ++i ) { + if( haschanged( *i ) ) { + return true; + } + } + return false; +} bool DepTable::extchanged(string const & ext) const { @@ -112,14 +193,32 @@ void DepTable::remove_files_with_extension(string const & suf) { - DepList tmp; - // we want const_iterator (Lgb) - for (DepList::iterator cit = deplist.begin(); - cit != deplist.end(); ++cit) { - if (!suffixIs((*cit).first, suf)) - tmp[(*cit).first] = (*cit).second; + DepList::iterator cit = deplist.begin(); + while ( cit != deplist.end() ) { + if (suffixIs((*cit).first, suf)) { + // Can't erase the current iterator, but we can increment and +then erase. + // deplist is a map so only the erased iterator is invalidated. + DepList::iterator doomed = cit++; + deplist.erase(doomed); + continue; + } + cit++; + } +} + +void DepTable::remove_file(string const & filename) +{ + DepList::iterator cit = deplist.begin(); + while ( cit != deplist.end() ) { + if (OnlyFilename((*cit).first) == filename) { + // Can't erase the current iterator, but we can increment and +then erase. + // deplist is a map so only the erased iterator is invalidated. + DepList::iterator doomed = cit++; + deplist.erase(doomed); + continue; + } + cit++; } - deplist.swap(tmp); } @@ -132,11 +231,13 @@ lyxerr << "Write dep: " << (*cit).first << " " << (*cit).second.first << " " - << (*cit).second.second << endl; + << cit->second.second << " " + << cit->second.mtime << endl; } ofs << (*cit).first << " " << (*cit).second.first << " " - << (*cit).second.second << endl; + << cit->second.second << " " + << cit->second.mtime << endl; } } @@ -147,13 +248,19 @@ string nome; unsigned long one = 0; unsigned long two = 0; - while(ifs >> nome >> one >> two) { + unsigned long mtime = 0; + while(ifs >> nome >> one >> two >> mtime) { if (lyxerr.debugging(Debug::DEPEND)) { lyxerr << "Read dep: " << nome << " " << one << " " - << two << endl; + << two << " " + << mtime << endl; } - deplist[nome] = make_pair(one, two); + dep_info di; + di.first = one; + di.second = two; + di.mtime = mtime; + deplist[nome] = di; } } --- lyx-1.1.6fix3-orig/src/DepTable.h Tue Apr 4 10:19:07 2000 +++ lyx-1.1.6fix3/src/DepTable.h Tue Nov 27 14:31:15 2001 @@ -17,6 +17,7 @@ #include "LString.h" #include <map> +#include <set> #ifdef __GNUG__ #pragma interface @@ -43,16 +44,26 @@ bool sumchange() const; /// return true if fil has changed. bool haschanged(string const & fil) const; + /// return true if any of the files named in the set have changed. + bool haschanged( std::set<string> const & files ) const; /// return true if a file with extension ext has changed. bool extchanged(string const & ext) const; /// bool exist(string const & fil) const; /// void remove_files_with_extension(string const &); + /// + void remove_file(string const &); private: + /// Each element stores file name (absolute path), previous crc and current +crc of file. + struct dep_info { + unsigned long first; + unsigned long second; + long mtime; + }; /// - typedef std::map<string, - std::pair<unsigned long, unsigned long> > DepList; + typedef std::map<string, dep_info> DepList; + /// DepList deplist; };