#!/usr/bin/python #+ # This script ensures there is only one instance of a particular command # running at a time. It may be useful, for example, for potentially # lengthy cron tasks, to ensure that a new task is not started if the # previous one has not finished yet. # # Invoke this script from the command line as follows: # # OneAtATime [opts] cmd # # where cmd is the Shell command line to execute. Valid options are: # # --id=s # use the string s as the unique identifier for this command # (should not contain "/" or nul characters). Any other execution # of OneAtATime specifying the same id will not proceed as long as # this instance is still executing. # If omitted, the id is computed as a hash on the cmd string. # # --timeout=n # If an existing instance has been executing for at least n # seconds, assume it's taking too long and try to kill it # before proceeding. If omitted, the timeout is infinity. # # --verbose # increases the verbosity level by 1 each time it is specified. # The initial default verbosity level is 0. # # --wait # If specified, then this invocation should sleep until any # previous instance is finished (or until the timeout elapses, # if specified). If not specified, then this invocation will # simply exit if an existing instance is already running. # Note that, if more than one invocation is waiting on a # previous instance to complete, there is no guarantee in # what order they'll execute. # # Start of development 2006 March 23 by Lawrence D'Oliveiro # <[EMAIL PROTECTED]>. # First working version 2006 March 24. # Fix hang if previous process dies without freeing instance lock and # --timeout was not specified 2006 March 24. # Ensure lock-breaking recovery is still invoked even without --wait, # include effective user ID in instance lock name for uniqueness # 2006 March 25. # Last modified 2006 March 27. #-
import sys import os import time import errno import signal import getopt import sha SignalName = {} for Name in dir(signal) : # collect symbolic names for all signals Value = getattr(signal, Name) if Name.startswith("SIG") and type(Value) == int : SignalName[Value] = Name #end if #end for #+ # Mainline #- (Opts, Args) = getopt.getopt \ ( sys.argv[1:], "", ["id=", "timeout=", "verbose", "wait"] ) if len(Args) != 1 : raise getopt.GetoptError("need exactly one arg, the cmd to execute") #end if Cmd = Args[0] ID = None Timeout = None Wait = False Verbosity = 0 for Keyword, Value in Opts : if Keyword == "--id" : ID = Value elif Keyword == "--timeout" : Timeout = int(Value) elif Keyword == "--verbose" : Verbosity += 1 elif Keyword == "--wait" : Wait = True #end if #end for if ID == None : ID = sha.new(Cmd).hexdigest()[:16] # less than 40-char file name should be OK #end if def Debug(Level, Msg) : if Verbosity >= Level : print Msg #end if #end Debug PidFileName = "/tmp/OneAtATime-pid-%d" % os.getpid() PidFile = file(PidFileName, "w") PidFile.write("%s\n" % os.getpid()) PidFile.close() InstanceLockName = "/tmp/OneAtATime-lock-%d-%s" % (os.geteuid(), ID) # presence of this file is used to guard against more than one # invocation of a task with the same id at once InstanceLock2Name = "/tmp/OneAtaTime-lock2-%s" % ID # presence of this file is used to serialize checking for # presence of InstanceLockName and of the process whose PID is # contained in that file GotLock = False GotLock2 = False LastLastMod = None OtherPid = None while True : # try to get instance lock if not GotLock2 : while True : # grab the secondary lock for checking the instance lock try : os.symlink(PidFileName, InstanceLock2Name) # guaranteed atomic operation that will fail if # destination name already exists. Note use of # symlink rather than link because my pid file # might be older than secondary lock timeout # if I've been waiting a while. GotLock2 = True break except OSError, (ErrNo, Msg) : if ErrNo != errno.EEXIST : # Not bothering to recover from privilege failures raise OSError(ErrNo, Msg) #end if #end try # Somebody else has the lock, check it looks reasonable try : LastMod = os.lstat(InstanceLock2Name).st_mtime # note use of lstat to find out how long symlink has # been present, not age of pid file except OSError, (ErrNo, Msg) : if ErrNo == errno.ENOENT : LastMod = None else : # Not bothering to recover from privilege failures raise OSError(ErrNo, Msg) #end if #end try if LastMod != None : if LastMod + 10 < time.time() : Debug(0, "Breaking secondary lock") try : os.unlink(InstanceLock2Name) except OSError, (ErrNo, Msg) : if ErrNo != errno.ENOENT : # Not bothering to recover from privilege failures raise OSError(ErrNo, Msg) #end if #end try else : # lock looks valid, wait awhile and try again time.sleep(1) #end if else : pass # disappeared while I was looking at it #end if #end while #end if not GotLock2 # At this point, I have the secondary lock, so nobody else will be # trying the following operations at the same time. try : LastMod = os.stat(InstanceLockName).st_mtime except OSError, (ErrNo, Msg) : if ErrNo == errno.ENOENT : os.link(PidFileName, InstanceLockName) GotLock = True else : # Not bothering to recover from privilege failures raise OSError(ErrNo, Msg) #end if #end try if GotLock : Debug(2, "Got the lock") break #end if if LastMod != LastLastMod : # instance lock file seems to have changed, (re)fetch OtherPid. Debug(2, "Getting existing process ID") try : PidFile = file(InstanceLockName, "r") except OSError, (ErrNo, Msg) : if ErrNo == errno.ENOENT : PidFile = None else : # Not bothering to recover from privilege failures raise OSError(ErrNo, Msg) #end if #end try if PidFile != None : OtherPid = PidFile.readline() OtherPid = int(OtherPid.replace("\n", "").replace(" ", "")) # not bothering to guard against bad pidfile contents PidFile.close() Debug(1, "Existing process ID is %d" % OtherPid) else : OtherPid = None #end if LastLastMod = LastMod #end if if OtherPid != None : # check the process still exists try : os.kill(OtherPid, 0) except OSError, (ErrNo, Msg) : if ErrNo == errno.ESRCH : OtherPid = None # nope, it's gone else : # Not bothering to recover from privilege failures raise OSError(ErrNo, Msg) #end if #end try #end if BreakLock = \ OtherPid == None \ or \ Timeout != None and time.time() > LastMod + Timeout if BreakLock : Debug(0, "Breaking lock and killing existing lock owner") if OtherPid != None : ExtremePrejudice = False while True : if ExtremePrejudice : Signal = signal.SIGKILL else : Signal = signal.SIGTERM # give it a chance to clean up #end if try : os.kill(OtherPid, Signal) except OSError, (ErrNo, Msg) : if ErrNo == errno.ESRCH : break # OK, it's gone else : # Not bothering to recover from privilege failures raise OSError(ErrNo, Msg) #end if #end try if ExtremePrejudice : break # it should be gone StartWait = time.time() while True : # wait up to a certain amount of time for process to # terminate by itself try : os.kill(OtherPid, 0) StillThere = True except OSError, (ErrNo, Msg) : if ErrNo == errno.ESRCH : StillThere = False else : # Not bothering to recover from privilege failures raise OSError(ErrNo, Msg) #end if #end try if not StillThere or time.time() - StartWait > 5.0 : # Note this timeout must be less than secondary lock timeout, # otherwise another invocation could take secondary lock # away from me break time.sleep(1) #end while if not StillThere : break # OK, it's gone ExtremePrejudice = True # out of patience #end while OtherPid = None #end if # Lock owner killed, now clean up lock file (if owner # didn't already clean it up) so I can grab it for myself try : os.unlink(InstanceLockName) except OSError, (ErrNo, Msg) : if ErrNo != errno.ENOENT : # Not bothering to recover from privilege failures raise OSError(ErrNo, Msg) #end if #end try elif OtherPid != None : if GotLock2 : # mustn't hang on to secondary lock for too long os.unlink(InstanceLock2Name) GotLock2 = False #end if if not Wait : Debug(1, "Can't get lock, exiting") break #end if time.sleep(1) #end if #end while if GotLock2 : os.unlink(InstanceLock2Name) #end if os.unlink(PidFileName) if GotLock : Debug(2, "Spawning \"%s\"" % Cmd) ChildPid = os.spawnl(os.P_NOWAIT, "/bin/bash", "/bin/bash", "-c", Cmd) if Timeout != None : StartWait = time.time() while True : time.sleep(5) (ChildPidAgain, ChildStatus) = os.waitpid(ChildPid, os.WNOHANG) if ChildPidAgain != 0 : break if time.time() > StartWait + Timeout : Debug(0, "Command taking too long, killing") os.kill(ChildPid, signal.SIGKILL) ChildStatus = os.waitpid(ChildPid, 0)[1] break #end if #end while else : ChildStatus = os.waitpid(ChildPid, 0)[1] #end if if os.WIFEXITED(ChildStatus) : if os.WEXITSTATUS(ChildStatus) != 0 : Debug(1, "Child exited with status %d" % os.WEXITSTATUS(ChildStatus)) #end if elif os.WIFSIGNALED(ChildStatus) : SigValue = os.WTERMSIG(ChildStatus) if SignalName.has_key(SigValue) : SigName = SignalName[SigValue] else : SigName = "?" #end if Debug(0, "Child died from signal %d (%s)" % (SigValue, SigName)) else : Debug(0, "Child ?terminated? status %d" % ChildStatus) #end if os.unlink(InstanceLockName) #end if -- http://mail.python.org/mailman/listinfo/python-list