[Buildbot-commits] buildbot/buildbot/slave interfaces.py,NONE,1.1 bot.py,1.3,1.4 commands.py,1.16,1.17
Brian Warner
warner at users.sourceforge.net
Fri Dec 3 22:54:54 UTC 2004
- Previous message (by thread): [Buildbot-commits] buildbot/buildbot/process base.py,1.43,1.44 builder.py,1.18,1.19 step.py,1.56,1.57
- Next message (by thread): [Buildbot-commits] buildbot/buildbot/status builder.py,1.45,1.46 html.py,1.46,1.47
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/buildbot/buildbot/buildbot/slave
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20004/buildbot/slave
Modified Files:
bot.py commands.py
Added Files:
interfaces.py
Log Message:
Make commands (and builds) interruptible. Improve lost-slave behavior.
Merging in several days of changes from local Arch branch, see ChangeLog for
details about individual files.
Index: bot.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/slave/bot.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- bot.py 30 Aug 2004 22:15:23 -0000 1.3
+++ bot.py 3 Dec 2004 22:54:52 -0000 1.4
@@ -35,21 +35,20 @@
def __init__(self, builder):
self.builder = builder
-class SlaveBuilder(pb.Referenceable):
+class SlaveBuilder(pb.Referenceable, service.Service):
"""This is the local representation of a single Builder: it handles a
single kind of build (like an all-warnings build). It has a name and a
home directory. The rest of its behavior is determined by the master.
"""
-
+
+ stopCommandOnShutdown = True
+
def __init__(self, parent, name, builddir, not_really):
+ #service.Service.__init__(self)
self.name = name
- self.bot = parent
self.builddir = builddir
self.not_really = not_really
- self.basedir = os.path.join(self.bot.basedir, builddir)
- if not os.path.isdir(self.basedir):
- os.mkdir(self.basedir)
self.remote = None
# remote is a ref to the Builder object on the master side, and is
# set when they attach. It really isn't used very much
@@ -74,6 +73,20 @@
def __repr__(self):
return "<SlaveBuilder '%s'>" % self.name
+
+ def setServiceParent(self, parent):
+ service.Service.setServiceParent(self, parent)
+ self.bot = self.parent
+ self.basedir = os.path.join(self.bot.basedir, self.builddir)
+ if not os.path.isdir(self.basedir):
+ os.mkdir(self.basedir)
+
+ def stopService(self):
+ service.Service.stopService(self)
+ if self.command and self.stopCommandOnShutdown:
+ self.stopCommand()
+ self.command = None
+
def remote_setMaster(self, remote):
self.remote = remote
self.remote.notifyOnDisconnect(self.lostRemote)
@@ -82,14 +95,13 @@
def lostRemote(self, remote):
log.msg("lost remote")
- if self.remote != remote:
- print "WEIRD: lost the wrong remote"
self.remote = None
+
def lostRemoteStep(self, remotestep):
log.msg("lost remote step")
- if self.remoteStep != remotestep:
- print "WEIRD: lost the wrong remote step"
self.remoteStep = None
+ if self.command and self.stopCommandOnShutdown:
+ self.stopCommand()
# the following are Commands that can be invoked by the master-side
# Builder
@@ -105,7 +117,7 @@
if self.command:
log.msg("leftover command, dropping it")
- #self.stopCommand()
+ self.stopCommand()
self.command = None
try:
@@ -120,25 +132,49 @@
self.remoteStep.notifyOnDisconnect(self.lostRemoteStep)
self.updateNum = 0
self.complete = None
+ self.command.running = True
d = defer.maybeDeferred(self.command.start)
d.addCallbacks(self.commandComplete, self.commandFailed)
return None
- # the following are invoked by the Commands we spawn
+ def remote_interruptCommand(self, stepId, why):
+ """Halt the current step."""
+ log.msg("asked to interrupt current command: %s" % why)
+ if not self.command:
+ # TODO: just log it, a race could result in their interrupting a
+ # command that wasn't actually running
+ log.msg(" .. but none was running")
+ return
+ self.command.interrupt()
+
+ def stopCommand(self):
+ if not self.command:
+ return
+ self.command.running = False
+ if not self.command.interrupted:
+ self.command.interrupt()
+
+
+ # these two are fired by the Deferred attached to each Command
def commandComplete(self, dummy):
+ if not self.running:
+ return
self.sendComplete()
def commandFailed(self, why):
+ if not self.running:
+ return
log.msg("commandFailed")
log.err(why)
self.sendComplete(why)
-
- # these are utility routines used by sendStatus and commandComplete
+ # sendUpdate is invoked by the Commands we spawn
def sendUpdate(self, data=None):
"""This sends the status update to the master-side BuildStep object,
giving it a sequence number in the process. It adds the update to
a queue, and asks the master to acknowledge the update so it can be
removed from that queue."""
+ if not self.running:
+ return
self.updateNum += 1
update = [data, self.updateNum]
#log.msg("sendUpdate", update)
@@ -149,6 +185,8 @@
d.addCallback(self.ackUpdate)
d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate")
+ # these are utility routines used by sendStatus and commandComplete
+
def dummy(self, value):
pass
@@ -218,10 +256,6 @@
self.sendAllUpdates()
self.sendAllCompletes()
- def stopCommand(self):
- if self.command:
- self.command.interrupt()
- self.command = None
def finishCommand(self):
log.msg("SlaveBuilder.finishCommand", self.command)
self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep)
@@ -233,10 +267,12 @@
reactor.stop()
-class Bot(pb.Referenceable):
+class Bot(pb.Referenceable, service.MultiService):
usePTY = None
+ name = "bot"
def __init__(self, basedir, usePTY, not_really=0):
+ service.MultiService.__init__(self)
self.basedir = basedir
self.usePTY = usePTY
self.not_really = not_really
@@ -263,11 +299,13 @@
else:
b = SlaveBuilder(self, name, builddir, self.not_really)
b.usePTY = self.usePTY
+ b.setServiceParent(self)
self.builders[name] = b
retval[name] = b
for name in self.builders.keys():
if not name in map(lambda a: a[0], wanted):
log.msg("removing old builder %s" % name)
+ self.builder[name].disownServiceParent()
del(self.builders[name])
return retval
@@ -370,18 +408,33 @@
self.keepaliveTimer = None
-class BuildSlave(internet.TCPClient):
+class BuildSlave(service.MultiService):
+ botClass = Bot
+
def __init__(self, host, port, name, passwd, basedir, keepalive,
usePTY):
- bot = Bot(basedir, usePTY)
+ service.MultiService.__init__(self)
+ bot = self.botClass(basedir, usePTY)
+ bot.setServiceParent(self)
bf = self.bf = BotFactory(keepalive)
bf.startLogin(credentials.UsernamePassword(name, passwd), client=bot)
- internet.TCPClient.__init__(self, host, port, bf)
+ self.connection = c = internet.TCPClient(host, port, bf)
+ c.setServiceParent(self)
+
+ def waitUntilDisconnected(self):
+ # utility method for testing. Returns a Deferred that will fire when
+ # we lose the connection to the master.
+ if not self.bf.perspective:
+ return defer.succeed(None)
+ d = defer.Deferred()
+ self.bf.perspective.notifyOnDisconnect(lambda res: d.callback(None))
+ return d
def stopService(self):
self.bf.continueTrying = 0
- internet.TCPClient.stopService(self)
- return self._connection.disconnect()
+ service.MultiService.stopService(self)
+ # now kill the TCP connection
+ self.connection._connection.disconnect()
class Options(usage.Options):
synopsis = "Usage: mktap buildbot slave --name <name> --passwd <passwd> [options]"
Index: commands.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/slave/commands.py,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -d -r1.16 -r1.17
--- commands.py 28 Oct 2004 07:27:08 -0000 1.16
+++ commands.py 3 Dec 2004 22:54:52 -0000 1.17
@@ -6,10 +6,14 @@
from twisted.internet import reactor, defer
from twisted.python import log, failure, runtime
+from buildbot.slave.interfaces import ISlaveCommand
from buildbot.slave.registry import registerSlaveCommand
cvs_ver = '$Revision$'[1+len("Revision: "):-2]
+# version history:
+# >=1.17: commands are interruptable
+
class CommandInterrupted(Exception):
pass
class TimeoutError(Exception):
@@ -206,11 +210,13 @@
def doTimeout(self):
msg = "command timed out: %d seconds without output" % self.timeout
+ self.kill(msg)
+
+ def kill(self, msg):
msg += ", killing pid %d" % self.process.pid
log.msg(msg)
self.sendStatus({'header': "\n" + msg + "\n"})
- # TODO: nicer way is: SIGTERM, wait, SIGKILL, wait, freak
hit = 0
if runtime.platformType == "posix":
try:
@@ -226,14 +232,15 @@
# probably no-such-process, maybe because there is no process
# group
pass
- try:
- log.msg("trying process.signalProcess('KILL')")
- self.process.signalProcess('KILL')
- log.msg(" successful")
- hit = 1
- except OSError:
- # could be no-such-process, because they finished very recently
- pass
+ if not hit:
+ try:
+ log.msg("trying process.signalProcess('KILL')")
+ self.process.signalProcess('KILL')
+ log.msg(" successful")
+ hit = 1
+ except OSError:
+ # could be no-such-process, because they finished very recently
+ pass
if not hit:
log.msg("signalProcess/os.kill failed both times")
# finished ought to be called momentarily
@@ -248,21 +255,17 @@
# be raised as pp tries to send status through .command
self.commandFailed(TimeoutError("SIGKILL failed to kill process"))
- def interrupt(self):
- log.msg("interrupting process", self.process)
- self.process.signalProcess('KILL')
- # some stdout/stderr may be lost, along with the exit code
-
class Command:
+ __implements__ = ISlaveCommand,
- """This class defines one command that can be invoked by the build
- master. The command is executed on the slave side, and always sends back
- a completion message when it finishes. It may also send intermediate
- status as it runs (by calling builder.sendStatus). Some commands can be
- interrupted (either by the build master or a local SIGINT), in which
- case the status message indicates the step failed to complete because of
- an interruption.
+ """This class defines one command that can be invoked by the build master.
+ The command is executed on the slave side, and always sends back a
+ completion message when it finishes. It may also send intermediate status
+ as it runs (by calling builder.sendStatus). Some commands can be
+ interrupted (either by the build master or a local timeout), in which
+ case the step is expected to complete normally with a status message that
+ indicates an error occurred.
These commands are used by BuildSteps on the master side. Each kind of
BuildStep uses a single Command. The slave must implement all the
@@ -274,46 +277,75 @@
where 'builder' is the parent SlaveBuilder object, and 'args' is a
dict that is interpreted per-command.
+ The setup(args) method is available for setup, and is run from __init__.
+
The Command is started with start(). This method must be implemented in a
subclass, and it should return a Deferred. When your step is done, you
should fire the Deferred (the results are not used). If the command is
- interrupted, it should errback with a CommandInterrupted failure.
+ interrupted, it should fire the Deferred anyway.
- The status messages all carry a dict, which is interpreted by the
- master-side BuildStep however it likes. The completion message only
- specifies whether the command was interrupted or not. If the Command
- needs to return an exit code of some sort, that should be sent as a
- regular status message before the completion message is sent. Once
- builder.commandComplete has been run, no more status messages may be
- sent."""
+ While the command runs. it may send status messages back to the
+ buildmaster by calling self.sendStatus(statusdict). The statusdict is
+ interpreted by the master-side BuildStep however it likes.
+
+ A separate completion message is sent when the deferred fires, which
+ indicates that the Command has finished, but does not carry any status
+ data. If the Command needs to return an exit code of some sort, that
+ should be sent as a regular status message before the deferred is fired .
+ Once builder.commandComplete has been run, no more status messages may be
+ sent.
+
+ If interrupt() is called, the Command should attempt to shut down as
+ quickly as possible. Child processes should be killed, new ones should
+ not be started. The Command should send some kind of error status update,
+ then complete as usual by firing the Deferred.
+
+ .interrupted should be set by interrupt(), and can be tested to avoid
+ sending multiple error status messages.
+
+ If .running is False, the bot is shutting down (or has otherwise lost the
+ connection to the master), and should not send any status messages. This
+ is taken care of in Command.sendStatus .
+
+ """
# builder methods:
# sendStatus(dict) (zero or more)
# commandComplete() or commandInterrupted() (one, at end)
debug = False
+ interrupted = False
+ running = False # set by Builder, cleared on shutdown or when the
+ # Deferred fires
def __init__(self, builder, stepId, args):
self.builder = builder
self.stepId = stepId # just for logging
self.args = args
+ self.setup(args)
+ def setup(self, args):
+ """Override this in a subclass to extract items from the args dict."""
+ pass
+
def start(self):
# should return a Deferred
raise NotImplementedError, "You must implement this in a subclass"
def sendStatus(self, status):
"""Send a status update to the master."""
- if self.debug: log.msg("sendStatus", status)
+ if self.debug:
+ log.msg("sendStatus", status)
+ if not self.running:
+ log.msg("would sendStatus but not .running")
+ return
self.builder.sendUpdate(status)
- def NOTinterrupt(self):
- """Stop the command. You must implement this in a subclass, then
- call this parent method when it is done. After this is called, no
- further status messages may be sent."""
- self.builder = None # make sure we stop sending messages
- self.interrupted = 1
- self.deferred.errback(failure.Failure(CommandInterrupted()))
+ def interrupt(self):
+ """Override this in a subclass to allow commands to be interrupted.
+ May be called multiple times, use self.interrupted=True if this
+ matters."""
+ pass
def _abandonOnFailure(self, rc):
if type(rc) is not int:
@@ -375,6 +407,11 @@
d = self.command.start()
return d
+ def interrupt(self):
+ self.interrupted = True
+ self.command.kill("command interrupted")
+
+
registerSlaveCommand("shell", SlaveShellCommand, cvs_ver)
@@ -382,18 +419,27 @@
def start(self):
self.d = defer.Deferred()
log.msg(" starting dummy command [%s]" % self.stepId)
- reactor.callLater(1, self.doStatus)
+ self.timer = reactor.callLater(1, self.doStatus)
return self.d
+ def interrupt(self):
+ self.timer.cancel()
+ self.timer = None
+ self.interrupted = True
+ self.finished()
+
def doStatus(self):
log.msg(" sending intermediate status")
self.sendStatus({'stdout': 'data'})
timeout = self.args.get('timeout', 5) + 1
- reactor.callLater(timeout - 1, self.finished)
+ self.timer = reactor.callLater(timeout - 1, self.finished)
def finished(self):
log.msg(" dummy command finished [%s]" % self.stepId)
- self.sendStatus({'rc': 0})
+ if self.interrupted:
+ self.sendStatus({'rc': 1})
+ else:
+ self.sendStatus({'rc': 0})
self.d.callback(0)
registerSlaveCommand("dummy", DummyCommand, cvs_ver)
@@ -426,21 +472,19 @@
"""
- def __init__(self, builder, stepId, args):
- Command.__init__(self, builder, stepId, args)
+ def setup(self, args):
self.workdir = args['workdir']
self.mode = args.get('mode', "update")
self.revision = args.get('revision')
self.patch = args.get('patch')
self.timeout = args.get('timeout', 120)
- self.setup(args)
-
- def setup(self, args):
- """Override this in the VC-specific subclass to extract more args"""
- pass
+ # VC-specific subclasses should override this to extract more args.
+ # Make sure to upcall!
def start(self):
self.sendStatus({'header': "starting " + self.header + "\n"})
+ self.command = None
+
# self.srcdir is where the VC system should put the sources
if self.mode == "copy":
self.srcdir = "source" # hardwired directory name, sorry
@@ -466,6 +510,11 @@
d.addCallbacks(self._sendRC, self._checkAbandoned)
return d
+ def interrupt(self):
+ self.interrupted = True
+ if self.command:
+ self.command.kill("command interrupted")
+
def doVC(self, res):
if self.sourcedirIsUpdateable():
d = self.doVCUpdate()
@@ -484,6 +533,8 @@
def maybeDoVCFallback(self, rc):
if type(rc) is int and rc == 0:
return rc
+ if self.interrupted:
+ raise AbandonChain(1)
msg = "update failed, clobbering and trying again"
self.sendStatus({'header': msg + "\n"})
log.msg(msg)
@@ -577,6 +628,7 @@
header = "cvs operation"
def setup(self, args):
+ SourceBase.setup(self, args)
self.cvsroot = args['cvsroot']
self.cvsmodule = args['cvsmodule']
self.global_options = args.get('global_options', [])
@@ -651,6 +703,7 @@
header = "svn operation"
def setup(self, args):
+ SourceBase.setup(self, args)
self.svnurl = args['svnurl']
def sourcedirIsUpdateable(self):
@@ -695,6 +748,7 @@
header = "darcs operation"
def setup(self, args):
+ SourceBase.setup(self, args)
self.repourl = args['repourl']
def sourcedirIsUpdateable(self):
@@ -741,6 +795,7 @@
buildconfig = None
def setup(self, args):
+ SourceBase.setup(self, args)
self.url = args['url']
self.version = args['version']
@@ -825,6 +880,7 @@
header = "p4 sync"
def setup(self, args):
+ SourceBase.setup(self, args)
self.p4port = args['p4port']
def sourcedirIsUpdateable(self):
--- NEW FILE: interfaces.py ---
#! /usr/bin/python
from twisted.python.components import Interface
class ISlaveCommand(Interface):
"""This interface is implemented by all of the buildslave's Command
subclasses. It specifies how the buildslave can start, interrupt, and
query the various Commands running on behalf of the buildmaster."""
def __init__(builder, stepId, args):
"""Create the Command. 'builder' is a reference to the parent
buildbot.bot.SlaveBuilder instance, which will be used to send status
updates (by calling builder.sendStatus). 'stepId' is a random string
which helps correlate slave logs with the master. 'args' is a dict of
arguments that comes from the master-side BuildStep, with contents
that are specific to the individual Command subclass.
This method is not intended to be subclassed."""
def setup(args):
"""This method is provided for subclasses to override, to extract
parameters from the 'args' dictionary. The default implemention does
nothing. It will be called from __init__"""
def start():
"""Begin the command, and return a Deferred.
While the command runs, it should send status updates to the
master-side BuildStep by calling self.sendStatus(status). The
'status' argument is typically a dict with keys like 'stdout',
'stderr', and 'rc'.
When the step completes, it should fire the Deferred (the results are
not used). If an exception occurs during execution, it may also
errback the deferred, however any reasonable errors should be trapped
and indicated with a non-zero 'rc' status rather than raising an
exception. Exceptions should indicate problems within the buildbot
itself, not problems in the project being tested.
"""
def interrupt():
"""This is called to tell the Command that the build is being stopped
and therefore the command should be terminated as quickly as
possible. The command may continue to send status updates, up to and
including an 'rc' end-of-command update (which should indicate an
error condition). The Command's deferred should still be fired when
the command has finally completed.
If the build is being stopped because the slave it shutting down or
because the connection to the buildmaster has been lost, the status
updates will simply be discarded. The Command does not need to be
aware of this.
Child shell processes should be killed. Simple ShellCommand classes
can just insert a header line indicating that the process will be
killed, then os.kill() the child."""
- Previous message (by thread): [Buildbot-commits] buildbot/buildbot/process base.py,1.43,1.44 builder.py,1.18,1.19 step.py,1.56,1.57
- Next message (by thread): [Buildbot-commits] buildbot/buildbot/status builder.py,1.45,1.46 html.py,1.46,1.47
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the Commits
mailing list