I do believe that we've got there. Attached is the final patch with a 
description of the design embedded in forkedcontr.C.

I'll commit this tomorrow to give everybody a fair chance to complain 
loudly.

Many thanks to John Levon for holding my hand through all this.

-- 
Angus
Index: src/ChangeLog
===================================================================
RCS file: /usr/local/lyx/cvsroot/lyx-devel/src/ChangeLog,v
retrieving revision 1.1840
diff -u -p -r1.1840 ChangeLog
--- src/ChangeLog	23 Mar 2004 14:39:40 -0000	1.1840
+++ src/ChangeLog	23 Mar 2004 22:35:25 -0000
@@ -1,5 +1,11 @@
 2004-03-23  Angus Leeming  <[EMAIL PROTECTED]>
 
+	* BufferView_pimpl.C (cursorToggle): use the cursor toggle to
+	deal with any child processes that have finished but are waiting to
+	communicate this fact to the rest of LyX.
+
+2004-03-23  Angus Leeming  <[EMAIL PROTECTED]>
+
 	* ispell.C (LaunchIspell):
 	* lyx_cb.C (AutoSaveBuffer): change the signature of clone to return
 	a boost::shred_ptr rather than a std::auto_ptr.
Index: src/BufferView_pimpl.C
===================================================================
RCS file: /usr/local/lyx/cvsroot/lyx-devel/src/BufferView_pimpl.C,v
retrieving revision 1.524
diff -u -p -r1.524 BufferView_pimpl.C
--- src/BufferView_pimpl.C	19 Mar 2004 16:36:50 -0000	1.524
+++ src/BufferView_pimpl.C	23 Mar 2004 22:35:25 -0000
@@ -60,6 +60,7 @@
 #include "graphics/Previews.h"
 
 #include "support/filetools.h"
+#include "support/forkedcontr.h"
 #include "support/globbing.h"
 #include "support/path_defines.h"
 #include "support/tostr.h"
@@ -72,6 +73,7 @@ using lyx::support::AddPath;
 using lyx::support::bformat;
 using lyx::support::FileFilterList;
 using lyx::support::FileSearch;
+using lyx::support::ForkedcallsController;
 using lyx::support::IsDirWriteable;
 using lyx::support::MakeDisplayPath;
 using lyx::support::strToUnsignedInt;
@@ -584,8 +586,17 @@ void BufferView::Pimpl::update()
 // Callback for cursor timer
 void BufferView::Pimpl::cursorToggle()
 {
-	if (buffer_)
+	if (buffer_) {
 		screen().toggleCursor(*bv_);
+
+		// Use this opportunity to deal with any child processes that
+		// have finished but are waiting to communicate this fact
+		// to the rest of LyX.
+		ForkedcallsController & fcc = ForkedcallsController::get();
+		if (fcc.processesCompleted())
+			fcc.handleCompletedProcesses();
+	}
+
 	cursor_timeout.restart();
 }
 
Index: src/support/ChangeLog
===================================================================
RCS file: /usr/local/lyx/cvsroot/lyx-devel/src/support/ChangeLog,v
retrieving revision 1.241
diff -u -p -r1.241 ChangeLog
--- src/support/ChangeLog	23 Mar 2004 14:39:41 -0000	1.241
+++ src/support/ChangeLog	23 Mar 2004 22:35:27 -0000
@@ -1,5 +1,16 @@
 2004-03-23  Angus Leeming  <[EMAIL PROTECTED]>
 
+	* forkedcontr.[Ch]: get rid of the timer that we use to poll the list
+	of child proccesses and ascertain whether any have died. Instead use
+	the SIGCHLD signal emitted by the system to reap these zombies in the
+	maximally efficient manner. The subsequent emitting of the signal
+	associated with each child process *is* performed within the main
+	lyx event loop, thus ensuring that the code remains safe.
+
+	A detailed description of the design is to be found in forkedcontr.C.
+
+2004-03-23  Angus Leeming  <[EMAIL PROTECTED]>
+
 	* forkedcall.h (ForkedProcess, Forkedcall): change the signature of
 	clone to return a boost::shred_ptr rather than a std::auto_ptr.
 
Index: src/support/forkedcontr.C
===================================================================
RCS file: /usr/local/lyx/cvsroot/lyx-devel/src/support/forkedcontr.C,v
retrieving revision 1.19
diff -u -p -r1.19 forkedcontr.C
--- src/support/forkedcontr.C	23 Mar 2004 14:39:41 -0000	1.19
+++ src/support/forkedcontr.C	23 Mar 2004 22:35:27 -0000
@@ -17,11 +17,9 @@
 #include "forkedcontr.h"
 #include "forkedcall.h"
 #include "lyxfunctional.h"
-#include "debug.h"
 
-#include "frontends/Timeout.h"
+#include "debug.h"
 
-#include <boost/bind.hpp>
 #include <boost/iterator/indirect_iterator.hpp>
 
 #include <cerrno>
@@ -29,14 +27,14 @@
 #include <unistd.h>
 #include <sys/wait.h>
 
-
-using boost::bind;
-
 using std::endl;
 using std::find_if;
+
 using std::string;
+using std::vector;
 
 #ifndef CXX_GLOBAL_CSTD
+using std::signal;
 using std::strerror;
 #endif
 
@@ -44,6 +42,136 @@ using std::strerror;
 namespace lyx {
 namespace support {
 
+/* The forkedcall controller code handles finished child processes in a
+   two-stage process.
+
+   1. It uses the SIGCHLD signal emitted by the system when the child process
+      finishes to reap the resulting zombie. The handler routine also
+      updates an internal list of completed children.
+   2. The signals associated with these completed children are then emitted
+      as part of the main LyX event loop.
+
+   The guiding philosophy is that zombies are a global resource that should
+   be reaped as soon as possible whereas an internal list of dead children
+   is not. Indeed, to emit the signals within the asynchronous handler
+   routine would result in unsafe code.
+
+   The signal handler is guaranteed to be safe even though it may not be 
+   atomic:
+
+   int completed_child_status;
+   sig_atomic_t completed_child_pid;
+
+   extern "C"
+   void child_handler(int)
+   {
+     // Clean up the child process.
+     completed_child_pid = wait(&completed_child_status);
+   }
+
+   (See the signals tutorial at http://tinyurl.com/3h82w.) 
+
+   It's safe because:
+   1. wait(2) is guaranteed to be async-safe.
+   2. child_handler handles only SIGCHLD signals so all subsequent 
+      SIGCHLD signals are blocked from entering the handler until the 
+      existing signal is processed.
+
+   This handler performs 'half' of the necessary clean up after a 
+   completed child process. It prevents us leaving a stream of zombies 
+   behind but does not go on to tell the main LyX program to finish the 
+   clean-up by emitting the stored signal. That would most definitely 
+   not be safe.
+
+   The only problem with the above is that the global stores 
+   completed_child_status, completed_child_pid may be overwritten before 
+   the clean-up is completed in the main loop.
+
+   However, the code in child_handler can be extended to fill an array of 
+   completed processes. Everything remains safe so long as no 'unsafe' 
+   functions are called. (See the list of async-safe functions at 
+   http://tinyurl.com/3h82w.)
+
+   struct child_data {
+     pid_t pid;
+     int status;
+   };
+
+   // This variable may need to be resized in the main program 
+   // as and when a new process is forked. This resizing must be
+   // protected with sigprocmask
+   std::vector<child_data> reaped_children;
+   sig_atomic_t current_child = -1;
+
+   extern "C"
+   void child_handler(int)
+   {
+     child_data & store = reaped_children[++current_child];
+     // Clean up the child process.
+     store.pid = wait(&store.status);
+   }
+
+   That is, we build up a list of completed children in anticipation of 
+   the main loop then looping over this list and invoking any associated 
+   callbacks etc. The nice thing is that the main loop needs only to 
+   check the value of 'current_child':
+
+   if (current_child != -1)
+     handleCompletedProcesses();
+   
+   handleCompletedProcesses now loops over only those child processes 
+   that have completed (ie, those stored in reaped_children). It blocks 
+   any subsequent SIGCHLD signal whilst it does so:
+
+   // Used to block SIGCHLD signals.
+   sigset_t newMask, oldMask;
+
+   ForkedcallsController::ForkedcallsController()
+   {
+     reaped_children.resize(50);
+     signal(SIGCHLD, child_handler);
+
+     sigemptyset(&oldMask);
+     sigemptyset(&newMask);
+     sigaddset(&newMask, SIGCHLD);
+   }
+
+   void ForkedcallsController::handleCompletedProcesses()
+   {
+     if (current_child == -1)
+       return;
+
+     // Block the SIGCHLD signal.
+     sigprocmask(SIG_BLOCK, &newMask, &oldMask);
+
+     for (int i = 0; i != 1+current_child; ++i) {
+       child_data & store = reaped_children[i];
+       // Go on to handle the child process
+       ...
+     }
+
+     // Unblock the SIGCHLD signal and restore the old mask.
+     sigprocmask(SIG_SETMASK, &oldMask, 0);
+   }
+
+   Voilą! An efficient, elegant and *safe* mechanism to handle child processes.
+*/
+
+namespace {
+
+extern "C"
+void child_handler(int)
+{
+	ForkedcallsController & fcc = ForkedcallsController::get();
+	ForkedcallsController::Data & store =
+		fcc.reaped_children[++fcc.current_child];
+	// Clean up the child process.
+	store.pid = wait(&store.status);
+}
+
+} // namespace anon
+
+
 // Ensure, that only one controller exists inside process
 ForkedcallsController & ForkedcallsController::get()
 {
@@ -53,11 +181,13 @@ ForkedcallsController & ForkedcallsContr
 
 
 ForkedcallsController::ForkedcallsController()
+	: reaped_children(50), current_child(-1)
 {
-	timeout_ = new Timeout(100, Timeout::ONETIME);
+	signal(SIGCHLD, child_handler);
 
-	timeout_->timeout
-		.connect(bind(&ForkedcallsController::timer, this));
+	sigemptyset(&oldMask);
+	sigemptyset(&newMask);
+	sigaddset(&newMask, SIGCHLD);
 }
 
 
@@ -66,107 +196,113 @@ ForkedcallsController::ForkedcallsContro
 // I want to print or something.
 ForkedcallsController::~ForkedcallsController()
 {
-	delete timeout_;
+	signal(SIGCHLD, SIG_DFL);
 }
 
 
 void ForkedcallsController::addCall(ForkedProcess const & newcall)
 {
-	if (!timeout_->running())
-		timeout_->start();
-
 	forkedCalls.push_back(newcall.clone());
+
+	if (forkedCalls.size() > reaped_children.size()) {
+		// Block the SIGCHLD signal.
+		sigprocmask(SIG_BLOCK, &newMask, &oldMask);
+
+		reaped_children.resize(2*reaped_children.size());
+
+		// Unblock the SIGCHLD signal and restore the old mask.
+		sigprocmask(SIG_SETMASK, &oldMask, 0);
+	}
+}
+
+
+ForkedcallsController::iterator ForkedcallsController::find_pid(pid_t pid)
+{
+	typedef boost::indirect_iterator<ListType::iterator> iterator;
+
+	iterator begin = boost::make_indirect_iterator(forkedCalls.begin());
+	iterator end   = boost::make_indirect_iterator(forkedCalls.end());
+	iterator it = find_if(begin, end,
+			      lyx::compare_memfun(&Forkedcall::pid, pid));
+
+	return it.base();
+}
+
+
+// Kill the process prematurely and remove it from the list
+// within tolerance secs
+void ForkedcallsController::kill(pid_t pid, int tolerance)
+{
+	ListType::iterator it = find_pid(pid);
+	if (it == forkedCalls.end())
+		return;
+
+	(*it)->kill(tolerance);
+	forkedCalls.erase(it);
 }
 
 
 // Timer-call
 // Check the list and, if there is a stopped child, emit the signal.
-void ForkedcallsController::timer()
+void ForkedcallsController::handleCompletedProcesses()
 {
-	ListType::iterator it  = forkedCalls.begin();
-	ListType::iterator end = forkedCalls.end();
-	while (it != end) {
-		ForkedProcess * actCall = it->get();
-
-		pid_t pid = actCall->pid();
-		int stat_loc;
-		pid_t const waitrpid = waitpid(pid, &stat_loc, WNOHANG);
-		bool remove_it = false;
+	if (current_child == -1)
+		return;
+
+	// Block the SIGCHLD signal.
+	sigprocmask(SIG_BLOCK, &newMask, &oldMask);
 
-		if (waitrpid == -1) {
+	for (int i = 0; i != 1+current_child; ++i) {
+		Data & store = reaped_children[i];
+
+		if (store.pid == -1) {
 			lyxerr << "LyX: Error waiting for child: "
 			       << strerror(errno) << endl;
+			continue;
+		}
 
-			// Child died, so pretend it returned 1
-			actCall->setRetValue(1);
-			remove_it = true;
+		ListType::iterator it = find_pid(store.pid);
+		BOOST_ASSERT(it != forkedCalls.end());
 
-		} else if (waitrpid == 0) {
-			// Still running. Move on to the next child.
+		ForkedProcess & child = *it->get();
+		bool remove_it = false;
 
-		} else if (WIFEXITED(stat_loc)) {
+		if (WIFEXITED(store.status)) {
 			// Ok, the return value goes into retval.
-			actCall->setRetValue(WEXITSTATUS(stat_loc));
+			child.setRetValue(WEXITSTATUS(store.status));
 			remove_it = true;
 
-		} else if (WIFSIGNALED(stat_loc)) {
+		} else if (WIFSIGNALED(store.status)) {
 			// Child died, so pretend it returned 1
-			actCall->setRetValue(1);
+			child.setRetValue(1);
 			remove_it = true;
 
-		} else if (WIFSTOPPED(stat_loc)) {
-			lyxerr << "LyX: Child (pid: " << pid
+		} else if (WIFSTOPPED(store.status)) {
+			lyxerr << "LyX: Child (pid: " << store.pid
 			       << ") stopped on signal "
-			       << WSTOPSIG(stat_loc)
+			       << WSTOPSIG(store.status)
 			       << ". Waiting for child to finish." << endl;
 
 		} else {
 			lyxerr << "LyX: Something rotten happened while "
-				"waiting for child " << pid << endl;
+				"waiting for child " << store.pid << endl;
 
 			// Child died, so pretend it returned 1
-			actCall->setRetValue(1);
+			child.setRetValue(1);
 			remove_it = true;
 		}
 
 		if (remove_it) {
-			actCall->emitSignal();
+			child.emitSignal();
 			forkedCalls.erase(it);
-
-			/* start all over: emiting the signal can result
-			 * in changing the list (Ab)
-			 */
-			it = forkedCalls.begin();
-		} else {
-			++it;
 		}
 	}
 
-	if (!forkedCalls.empty() && !timeout_->running()) {
-		timeout_->start();
-	}
-}
-
-
-// Kill the process prematurely and remove it from the list
-// within tolerance secs
-void ForkedcallsController::kill(pid_t pid, int tolerance)
-{
-	typedef boost::indirect_iterator<ListType::iterator> iterator;
-
-	iterator begin = boost::make_indirect_iterator(forkedCalls.begin());
-	iterator end   = boost::make_indirect_iterator(forkedCalls.end());
-	iterator it = find_if(begin, end,
-			      lyx::compare_memfun(&Forkedcall::pid, pid));
-
-	if (it == end)
-		return;
-
-	it->kill(tolerance);
-	forkedCalls.erase(it.base());
+	// Reset the counter
+	current_child = -1;
 
-	if (forkedCalls.empty())
-		timeout_->stop();
+	// Unblock the SIGCHLD signal and restore the old mask.
+	sigprocmask(SIG_SETMASK, &oldMask, 0);
 }
 
 } // namespace support
Index: src/support/forkedcontr.h
===================================================================
RCS file: /usr/local/lyx/cvsroot/lyx-devel/src/support/forkedcontr.h,v
retrieving revision 1.15
diff -u -p -r1.15 forkedcontr.h
--- src/support/forkedcontr.h	23 Mar 2004 14:39:41 -0000	1.15
+++ src/support/forkedcontr.h	23 Mar 2004 22:35:27 -0000
@@ -17,10 +17,10 @@
 #define FORKEDCONTR_H
 
 #include <boost/shared_ptr.hpp>
-#include <sys/types.h> // needed for pid_t
+#include <csignal>
+//#include <sys/types.h> // needed for pid_t
 #include <list>
-
-class Timeout;
+#include <vector>
 
 namespace lyx {
 namespace support {
@@ -32,6 +32,15 @@ public:
 	/// Get hold of the only controller that can exist inside the process.
 	static ForkedcallsController & get();
 
+	/// Are there any completed child processes to be cleaned-up after?
+	bool processesCompleted() const { return current_child != -1; }
+
+	/** Those child processes that are found to have finished are removed
+	 *  from the list and their callback function is passed the final
+	 *  return state.
+ 	 */
+	void handleCompletedProcesses();
+
 	/// Add a new child process to the list of controlled processes.
 	void addCall(ForkedProcess const &);
 
@@ -41,28 +50,36 @@ public:
 	 */
 	void kill(pid_t, int tolerance = 5);
 
+	struct Data {
+		Data() : pid(0), status(0) {}
+		pid_t pid;
+		int status;
+	};
+
+	/** These data are used by the SIGCHLD handler to populate a list
+	 *  of child processes that have completed and been reaped.
+	 *  The associated signals are then emitted within the main LyX
+	 *  event loop.
+	 */
+	std::vector<Data> reaped_children;
+	sig_atomic_t current_child;
+
 private:
 	ForkedcallsController();
 	ForkedcallsController(ForkedcallsController const &);
 	~ForkedcallsController();
 
-	/** This method is connected to the timer. Every XX ms it is called
-	 *  so that we can check on the status of the children. Those that
-	 *  are found to have finished are removed from the list and their
-	 *  callback function is passed the final return state.
-	 */
-	void timer();
-
-	/// The child processes
 	typedef boost::shared_ptr<ForkedProcess> ForkedProcessPtr;
 	typedef std::list<ForkedProcessPtr> ListType;
-	///
+	typedef ListType::iterator iterator;
+
+	iterator find_pid(pid_t);
+
+	/// The child processes
 	ListType forkedCalls;
 
-	/** The timer. Enables us to check the status of the children
-	 *  every XX ms and to invoke a callback on completion.
-	 */
-	Timeout * timeout_;
+	/// Used to block SIGCHLD signals.
+	sigset_t newMask, oldMask;
 };
 
 } // namespace support

Reply via email to