[Buildbot-commits] buildbot/buildbot interfaces.py,1.20,1.21 master.py,1.54,1.55
Brian Warner
warner at users.sourceforge.net
Fri Dec 3 22:54:53 UTC 2004
Update of /cvsroot/buildbot/buildbot/buildbot
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20004/buildbot
Modified Files:
interfaces.py master.py
Log Message:
Make commands (and builds) interruptible. Improve lost-slave behavior.
Merging in several days of changes from local Arch branch, see ChangeLog for
details about individual files.
Index: interfaces.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/interfaces.py,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -d -r1.20 -r1.21
--- interfaces.py 15 Oct 2004 16:59:44 -0000 1.20
+++ interfaces.py 3 Dec 2004 22:54:50 -0000 1.21
@@ -603,8 +603,18 @@
further control the new build, or from which an IBuildStatus object
can be obtained."""
- def ping():
- """Attempt to contact the slave and see if it is still alive."""
+ def getBuild(number):
+ """Attempt to return an IBuildControl object for the given build.
+ Returns None if no such object is available. This will only work for
+ the build that is currently in progress: once the build finishes,
+ there is nothing to control anymore."""
+
+ def ping(timeout=30):
+ """Attempt to contact the slave and see if it is still alive. This
+ returns a Deferred which fires with either True (the slave is still
+ alive) or False (the slave did not respond). As a side effect, adds
+ an event to this builder's column in the waterfall display
+ containing the results of the ping."""
# TODO: this ought to live in ISlaveControl, maybe with disconnect()
# or something. However the event that is emitted is most useful in
# the Builder column, so it kinda fits here too.
@@ -612,3 +622,7 @@
class IBuildControl(Interface):
def getStatus():
"""Return an IBuildStatus object for the Build that I control."""
+ def stopBuild(reason="<no reason given>"):
+ """Halt the build. This has no effect if the build has already
+ finished."""
+
Index: master.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/master.py,v
retrieving revision 1.54
retrieving revision 1.55
diff -u -d -r1.54 -r1.55
--- master.py 14 Oct 2004 16:47:33 -0000 1.54
+++ master.py 3 Dec 2004 22:54:50 -0000 1.55
@@ -38,28 +38,30 @@
class BotPerspective(NewCredPerspective):
"""This is the master-side representative for a remote buildbot slave.
- When buildbots connect in, they get a reference to a new instance of
- this class. The BotMaster object is stashed as the .service
- attribute."""
+ There is exactly one for each slave described in the config file (the
+ c['bots'] list). When buildbots connect in (.attach), they get a
+ reference to this instance. The BotMaster object is stashed as the
+ .service attribute."""
slave_commands = None
- def __init__(self, slavename, builders, slave_status):
- self.slavename = slavename
- self.builders = builders
- self.slave_status = slave_status
+ def __init__(self, name):
+ self.slavename = name
+ self.slave_status = SlaveStatus(name)
+ self.builders = [] # list of b.p.builder.Builder instances
+ self.slave = None # a RemoteReference to the Bot, when connected
def addBuilder(self, builder):
"""Called to add a builder after the slave has connected."""
self.builders.append(builder)
- # TODO: resync with slave, to accomodate builders added after
- # attach
- self.sendBuilderList()
+ if self.slave:
+ self.sendBuilderList()
def removeBuilder(self, builder):
self.builders.remove(builder)
- builder.detached()
- self.sendBuilderList()
+ if self.slave:
+ builder.detached()
+ self.sendBuilderList()
def __repr__(self):
return "<BotPerspective '%s', builders: %s>" % \
@@ -67,14 +69,78 @@
string.join(map(lambda b: b.name, self.builders), ','))
def attached(self, mind):
- # this is called shortly after the slave connects. We go through a
- # sequence of calls, gathering information, then tell our Builders
- # that they have a slave to work with.
+ # this is called when the slave connects. It returns a Deferred that
+ # fires with a suitable pb.IPerspective to give to the slave (i.e.
+ # 'self')
+
+ if self.slave:
+ # uh-oh, we've got a duplicate slave. The most likely
+ # explanation is that the slave is behind a slow link, thinks we
+ # went away, and has attempted to reconnect, so we've got two
+ # "connections" from the same slave, but the previous one is
+ # stale. Give the new one precedence.
+ log.msg("duplicate slave %s replacing old one" % self.slavename)
+
+ # just in case we've got two identically-configured slaves,
+ # report the IP addresses of both so someone can resolve the
+ # squabble
+ tport = self.slave.broker.transport
+ log.msg("old slave was connected from", tport.getPeer())
+ log.msg("new slave is from", mind.broker.transport.getPeer())
+ d = self.disconnect()
+ d.addCallback(lambda res: self._attached(mind))
+ return d
+
+ self._attached(mind)
+ return defer.succeed(self)
+
+ def disconnect(self):
+ log.msg("disconnecting old slave %s now" % self.slavename)
+
+ # all kinds of teardown will happen as a result of
+ # loseConnection(), but it happens after a reactor iteration or
+ # two. Hook the actual disconnect so we can know when it is safe
+ # to connect the new slave. We have to wait one additional
+ # iteration (with callLater(0)) to make sure the *other*
+ # notifyOnDisconnect handlers have had a chance to run.
+ d = defer.Deferred()
+
+ self.slave.notifyOnDisconnect(lambda res: # TODO: d=d ?
+ reactor.callLater(0, d.callback, None))
+ tport = self.slave.broker.transport
+ # this is the polite way to request that a socket be closed
+ tport.loseConnection()
+ try:
+ # but really we don't want to wait for the transmit queue to
+ # drain. The remote end is unlikely to ACK the data, so we'd
+ # probably have to wait for a (20-minute) TCP timeout.
+ #tport._closeSocket()
+ # however, doing _closeSocket (whether before or after
+ # loseConnection) somehow prevents the notifyOnDisconnect
+ # handlers from being run. Bummer.
+ tport.offset = 0
+ tport.dataBuffer = ""
+ pass
+ except:
+ # however, these hacks are pretty internal, so don't blow up if
+ # they fail or are unavailable
+ log.msg("failed to accelerate the shutdown process")
+ pass
+ log.msg("waiting for slave to finish disconnecting")
+
+ # When this Deferred fires, we'll be ready to accept the new slave
+ return d
+
+ def _attached(self, mind):
+ # We go through a sequence of calls, gathering information, then
+ # tell our Builders that they have a slave to work with.
self.slave = mind
self.slave.callRemote("print", "attached").addErrback(lambda why: 0)
self.slave_status.connected = True
log.msg("bot attached")
+ # TODO: there is a window here (while we're retrieving slaveinfo)
+ # during which a disconnect or a duplicate-slave will be confusing
d = self.slave.callRemote("getSlaveInfo")
d.addCallback(self.got_info)
d.addErrback(self.infoUnavailable)
@@ -149,13 +215,16 @@
for name, remote in blist.items():
for b in self.builders:
if b.name == name:
+ # if we sent the builders list because of a config
+ # change, the Builder might already be attached.
+ # Builder.attached will ignore us if this happens.
b.attached(remote, self.slave_commands)
continue
def _listFailed(self, why):
log.msg("BotPerspective._listFailed")
log.err(why)
- # TODO: hand up on them, without setBuilderList we can't use them
+ # TODO: hang up on them, without setBuilderList we can't use them
def perspective_forceBuild(self, name, who=None):
# slave admins are allowed to force any of their own builds
@@ -176,13 +245,11 @@
pass
def detached(self, mind):
+ self.slave = None
self.slave_status.connected = False
- self.botmaster.detach(self)
for b in self.builders:
b.detached()
- self.builders = []
- log.msg("bot detached")
- # this perspective goes away now
+ log.msg("Botmaster.detached(%s)" % self.slavename)
class BotMaster(service.Service):
@@ -204,8 +271,7 @@
# which is the master-side object that defines and controls a build.
# They are added by calling botmaster.addBuilder() from the startup
# code.
- self.slaves = {}
- self.slaveStatus = {}
+ self.slaves = {} # maps slavename to BotPerspective
self.interlocks = {}
self.statusClientService = None
self.watchers = {}
@@ -220,10 +286,20 @@
def waitUntilBuilderDetached(self, name):
# convenience function for testing
d = defer.Deferred()
- b = self.builders[name]
+ b = self.builders.get(name, None)
+ if not b or not b.remote:
+ return defer.succeed(None)
b.watchers['detach'].append(d)
return d
+ def addSlave(self, slavename):
+ slave = BotPerspective(slavename)
+ self.slaves[slavename] = slave
+
+ def removeSlave(self, slavename):
+ d = self.slaves[slavename].disconnect
+ del self.slaves[slavename]
+
def getBuildernames(self):
return self.builderNames
@@ -233,24 +309,26 @@
that build: the builds cannot be done until the right slave
connects."""
if self.debug: print "addBuilder", builder
+ log.msg("Botmaster.addBuilder(%s)" % builder.name)
+
if builder.name in self.builderNames:
- raise KeyError, "muliply defined builder '%s'" % builder.name
+ raise KeyError("muliply defined builder '%s'" % builder.name)
+ slavename = builder.slavename
+ if not self.slaves.has_key(slavename):
+ raise KeyError("builder %s uses undefined slave %s" % \
+ (builder.name, slavename))
+
self.builders[builder.name] = builder
self.builderNames.append(builder.name)
builder.setBotmaster(self)
self.checkInactiveInterlocks() # TODO?: do this in caller instead?
- if not self.slaveStatus.has_key(builder.slavename):
- # this is a new slave, create a SlaveStatus object for it
- s = SlaveStatus(builder.slavename)
- self.slaveStatus[builder.slavename] = s
- slave = self.slaves.get(builder.slavename)
- if slave:
- # there is an active slave which needs to be informed about the
- # new builder
- slave.addBuilder(builder)
+
+ slave = self.slaves[slavename]
+ slave.addBuilder(builder)
def removeBuilder(self, builder):
if self.debug: print "removeBuilder", builder
+ log.msg("Botmaster.removeBuilder(%s)" % builder.name)
b = self.builders[builder.name]
# any linked interlocks will be made inactive before the builder is
# removed
@@ -266,18 +344,9 @@
i.deactivate(self.builders)
del self.builders[builder.name]
self.builderNames.remove(builder.name)
- # check for an active slave to remove the builder from
- for slavename, slave in self.slaves.items():
- if slavename == builder.slavename:
- slave.removeBuilder(builder)
- # now see if this was the last builder to use the slave
- used = False
- for b in self.builders.values():
- if b.slavename == builder.slavename:
- used = True
- break
- if not used:
- del self.slaveStatus[builder.slavename]
+ slave = self.slaves.get(builder.slavename)
+ if slave:
+ slave.removeBuilder(builder)
def addInterlock(self, interlock):
"""This is called by the setup code to create build interlocks:
@@ -312,40 +381,7 @@
interlock.deactivate(self.builders)
def getPerspective(self, slavename):
- if self.slaves.has_key(slavename):
- # uh-oh, we've got a duplicate slave. Try to figure out where the
- # old one is coming from so we can explain the problem
- log.msg("duplicate slave %s trying to connect" % slavename)
- addr = self.slaves[slavename].slave.broker.transport.getPeer()
- log.msg("old slave is connected from", addr)
- # unfortunately the slave doesn't currently emit this message
- raise ValueError("duplicate slave, old one connected from %s" \
- % addr)
-
- slave_status = self.slaveStatus.get(slavename)
- if not slave_status:
- # TODO: this is probably broken w.r.t slaves connecting before
- # their builders have been configured, or vice versa
- slave_status = SlaveStatus(slavename)
- self.slaveStatus[slavename] = slave_status
- slave_status.connected = True
-
- # Find all the builders that want to use this slave
- builders = [b for (name, b) in self.builders.items()
- if b.slavename == slavename]
- p = BotPerspective(slavename, builders, slave_status)
- p.botmaster = self
- self.slaves[slavename] = p
- return p
-
- def detach(self, p):
- if not self.slaves[p.slavename] == p:
- # TODO: I saw this happen, but I don't know why
- log.msg("WEIRD, wrong slave '%s' saying goodbye" % p.slavename)
- log.msg(" original:", self.slaves[p.slavename])
- log.msg(" detaching:", p)
- self.slaveStatus[p.slavename].connected = False
- del self.slaves[p.slavename]
+ return self.slaves[slavename]
def addChange(self, change):
for b in self.builders.values():
@@ -459,6 +495,8 @@
def requestAvatar(self, avatarID, mind, interface):
assert interface == pb.IPerspective
+ log.msg("requestAvatar(%s) from %s" % \
+ (avatarID, mind.broker.transport.getPeer()))
afactory = self.names.get(avatarID)
if afactory:
p = afactory.getPerspective()
@@ -474,14 +512,14 @@
p = self.botmaster.getPerspective(avatarID)
if not p:
- raise ValueError, "no perspective for '%s'" % avatarID
- p.attached(mind) # perhaps .callLater(0) ?
- # TODO: especially for BotPerspectives
- # TODO: the slave might be removed from BotMaster.slaves by the time
- # the .detached callback is run, causing the assert in
- # BotMaster.detach to fail
- return (pb.IPerspective, p,
- lambda p=p,mind=mind: p.detached(mind))
+ raise ValueError("no perspective for '%s'" % avatarID)
+
+ d = defer.maybeDeferred(p.attached, mind)
+ d.addCallback(self._avatarAttached, mind)
+ return d
+
+ def _avatarAttached(self, p, mind):
+ return (pb.IPerspective, p, lambda p=p,mind=mind: p.detached(mind))
########################################
@@ -537,6 +575,7 @@
self.statusTargets = []
+ self.bots = []
self.sources = []
self.readConfig = False
@@ -689,6 +728,12 @@
raise TypeError, "webPortnum '%s' must be an int" % webPortnum
for s in status:
assert interfaces.IStatusReceiver(s)
+ if 0: # tuple-specified builders are a problem
+ slavenames = [name for name,pw in bots]
+ for b in builders:
+ if b['slavename'] not in slavenames:
+ raise ValueError("builder %s uses undefined slave %s" \
+ % (b['name'], b['slavename']))
# now we're committed to implementing the new configuration, so do
# it atomically
@@ -700,13 +745,7 @@
# self.bots: Disconnect any that were attached and removed from the
# list. Update self.checker with the new list of passwords,
# including debug/change/status.
- self.checker.users = {} # violates abstraction, oh well
- for user, passwd in bots:
- self.checker.addUser(user, passwd)
- self.checker.addUser("change", "changepw")
-
- # TODO: hang up on old bots
- self.bots = bots
+ self.loadConfig_Slaves(bots)
# self.debugPassword
if debugPassword:
@@ -728,15 +767,7 @@
self.manhole = manhole
manhole.setServiceParent(self)
- # self.sources: shut down any that were removed, start any that were
- # added
- old = self.sources
- new = sources
- [self.change_svc.removeSource(source)
- for source in old if source not in new]
- [self.change_svc.addSource(source)
- for source in new if source not in old]
- self.sources = sources
+ self.loadConfig_Sources(sources)
# add/remove self.botmaster.builders to match builders. The
# botmaster will handle startup/shutdown issues.
@@ -761,6 +792,35 @@
log.msg("configuration updated")
+ def loadConfig_Slaves(self, bots):
+ # set up the Checker with the names and passwords of all valid bots
+ self.checker.users = {} # violates abstraction, oh well
+ for user, passwd in bots:
+ self.checker.addUser(user, passwd)
+ self.checker.addUser("change", "changepw")
+
+ # identify new/old bots
+ old = self.bots; oldnames = [name for name,pw in old]
+ new = bots; newnames = [name for name,pw in new]
+ # removeSlave will hang up on the old bot
+ [self.botmaster.removeSlave(name)
+ for name in oldnames if name not in newnames]
+ [self.botmaster.addSlave(name)
+ for name in newnames if name not in oldnames]
+
+ # all done
+ self.bots = bots
+
+ def loadConfig_Sources(self, sources):
+ # shut down any that were removed, start any that were added
+ old = self.sources
+ new = sources
+ [self.change_svc.removeSource(source)
+ for source in old if source not in new]
+ [self.change_svc.addSource(source)
+ for source in new if source not in old]
+ self.sources = sources
+
def loadConfig_Builders(self, newBuilders):
old = self.botmaster.getBuildernames()
newNames = []
More information about the Commits
mailing list