[Buildbot-commits] buildbot/buildbot interfaces.py,1.20,1.21 master.py,1.54,1.55

Brian Warner warner at users.sourceforge.net
Fri Dec 3 22:54:53 UTC 2004


Update of /cvsroot/buildbot/buildbot/buildbot
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20004/buildbot

Modified Files:
	interfaces.py master.py 
Log Message:
Make commands (and builds) interruptible. Improve lost-slave behavior.
Merging in several days of changes from local Arch branch, see ChangeLog for
details about individual files.


Index: interfaces.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/interfaces.py,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -d -r1.20 -r1.21
--- interfaces.py	15 Oct 2004 16:59:44 -0000	1.20
+++ interfaces.py	3 Dec 2004 22:54:50 -0000	1.21
@@ -603,8 +603,18 @@
         further control the new build, or from which an IBuildStatus object
         can be obtained."""
 
-    def ping():
-        """Attempt to contact the slave and see if it is still alive."""
+    def getBuild(number):
+        """Attempt to return an IBuildControl object for the given build.
+        Returns None if no such object is available. This will only work for
+        the build that is currently in progress: once the build finishes,
+        there is nothing to control anymore."""
+
+    def ping(timeout=30):
+        """Attempt to contact the slave and see if it is still alive. This
+        returns a Deferred which fires with either True (the slave is still
+        alive) or False (the slave did not respond). As a side effect, adds
+        an event to this builder's column in the waterfall display
+        containing the results of the ping."""
         # TODO: this ought to live in ISlaveControl, maybe with disconnect()
         # or something. However the event that is emitted is most useful in
         # the Builder column, so it kinda fits here too.
@@ -612,3 +622,7 @@
 class IBuildControl(Interface):
     def getStatus():
         """Return an IBuildStatus object for the Build that I control."""
+    def stopBuild(reason="<no reason given>"):
+        """Halt the build. This has no effect if the build has already
+        finished."""
+        

Index: master.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/master.py,v
retrieving revision 1.54
retrieving revision 1.55
diff -u -d -r1.54 -r1.55
--- master.py	14 Oct 2004 16:47:33 -0000	1.54
+++ master.py	3 Dec 2004 22:54:50 -0000	1.55
@@ -38,28 +38,30 @@
 
 class BotPerspective(NewCredPerspective):
     """This is the master-side representative for a remote buildbot slave.
-    When buildbots connect in, they get a reference to a new instance of
-    this class. The BotMaster object is stashed as the .service
-    attribute."""
+    There is exactly one for each slave described in the config file (the
+    c['bots'] list). When buildbots connect in (.attach), they get a
+    reference to this instance. The BotMaster object is stashed as the
+    .service attribute."""
 
     slave_commands = None
 
-    def __init__(self, slavename, builders, slave_status):
-        self.slavename = slavename
-        self.builders = builders
-        self.slave_status = slave_status
+    def __init__(self, name):
+        self.slavename = name
+        self.slave_status = SlaveStatus(name)
+        self.builders = [] # list of b.p.builder.Builder instances
+        self.slave = None # a RemoteReference to the Bot, when connected
 
     def addBuilder(self, builder):
         """Called to add a builder after the slave has connected."""
         self.builders.append(builder)
-        # TODO: resync with slave, to accomodate builders added after
-        # attach
-        self.sendBuilderList()
+        if self.slave:
+            self.sendBuilderList()
 
     def removeBuilder(self, builder):
         self.builders.remove(builder)
-        builder.detached()
-        self.sendBuilderList()
+        if self.slave:
+            builder.detached()
+            self.sendBuilderList()
 
     def __repr__(self):
         return "<BotPerspective '%s', builders: %s>" % \
@@ -67,14 +69,78 @@
                 string.join(map(lambda b: b.name, self.builders), ','))
 
     def attached(self, mind):
-        # this is called shortly after the slave connects. We go through a
-        # sequence of calls, gathering information, then tell our Builders
-        # that they have a slave to work with.
+        # this is called when the slave connects. It returns a Deferred that
+        # fires with a suitable pb.IPerspective to give to the slave (i.e.
+        # 'self')
+
+        if self.slave:
+            # uh-oh, we've got a duplicate slave. The most likely
+            # explanation is that the slave is behind a slow link, thinks we
+            # went away, and has attempted to reconnect, so we've got two
+            # "connections" from the same slave, but the previous one is
+            # stale. Give the new one precedence.
+            log.msg("duplicate slave %s replacing old one" % self.slavename)
+
+            # just in case we've got two identically-configured slaves,
+            # report the IP addresses of both so someone can resolve the
+            # squabble
+            tport = self.slave.broker.transport
+            log.msg("old slave was connected from", tport.getPeer())
+            log.msg("new slave is from", mind.broker.transport.getPeer())
+            d = self.disconnect()
+            d.addCallback(lambda res: self._attached(mind))
+            return d
+
+        self._attached(mind)
+        return defer.succeed(self)
+
+    def disconnect(self):
+        log.msg("disconnecting old slave %s now" % self.slavename)
+
+        # all kinds of teardown will happen as a result of
+        # loseConnection(), but it happens after a reactor iteration or
+        # two. Hook the actual disconnect so we can know when it is safe
+        # to connect the new slave. We have to wait one additional
+        # iteration (with callLater(0)) to make sure the *other*
+        # notifyOnDisconnect handlers have had a chance to run.
+        d = defer.Deferred()
+
+        self.slave.notifyOnDisconnect(lambda res: # TODO: d=d ?
+                                      reactor.callLater(0, d.callback, None))
+        tport = self.slave.broker.transport
+        # this is the polite way to request that a socket be closed
+        tport.loseConnection()
+        try:
+            # but really we don't want to wait for the transmit queue to
+            # drain. The remote end is unlikely to ACK the data, so we'd
+            # probably have to wait for a (20-minute) TCP timeout.
+            #tport._closeSocket()
+            # however, doing _closeSocket (whether before or after
+            # loseConnection) somehow prevents the notifyOnDisconnect
+            # handlers from being run. Bummer.
+            tport.offset = 0
+            tport.dataBuffer = ""
+            pass
+        except:
+            # however, these hacks are pretty internal, so don't blow up if
+            # they fail or are unavailable
+            log.msg("failed to accelerate the shutdown process")
+            pass
+        log.msg("waiting for slave to finish disconnecting")
+
+        # When this Deferred fires, we'll be ready to accept the new slave
+        return d
+
+    def _attached(self, mind):
+        # We go through a sequence of calls, gathering information, then
+        # tell our Builders that they have a slave to work with.
         self.slave = mind
         self.slave.callRemote("print", "attached").addErrback(lambda why: 0)
         self.slave_status.connected = True
         log.msg("bot attached")
 
+        # TODO: there is a window here (while we're retrieving slaveinfo)
+        # during which a disconnect or a duplicate-slave will be confusing
         d = self.slave.callRemote("getSlaveInfo")
         d.addCallback(self.got_info)
         d.addErrback(self.infoUnavailable)
@@ -149,13 +215,16 @@
         for name, remote in blist.items():
             for b in self.builders:
                 if b.name == name:
+                    # if we sent the builders list because of a config
+                    # change, the Builder might already be attached.
+                    # Builder.attached will ignore us if this happens.
                     b.attached(remote, self.slave_commands)
                     continue
 
     def _listFailed(self, why):
         log.msg("BotPerspective._listFailed")
         log.err(why)
-        # TODO: hand up on them, without setBuilderList we can't use them
+        # TODO: hang up on them, without setBuilderList we can't use them
 
     def perspective_forceBuild(self, name, who=None):
         # slave admins are allowed to force any of their own builds
@@ -176,13 +245,11 @@
         pass
 
     def detached(self, mind):
+        self.slave = None
         self.slave_status.connected = False
-        self.botmaster.detach(self)
         for b in self.builders:
             b.detached()
-        self.builders = []
-        log.msg("bot detached")
-        # this perspective goes away now
+        log.msg("Botmaster.detached(%s)" % self.slavename)
 
     
 class BotMaster(service.Service):
@@ -204,8 +271,7 @@
         # which is the master-side object that defines and controls a build.
         # They are added by calling botmaster.addBuilder() from the startup
         # code.
-        self.slaves = {}
-        self.slaveStatus = {}
+        self.slaves = {} # maps slavename to BotPerspective
         self.interlocks = {}
         self.statusClientService = None
         self.watchers = {}
@@ -220,10 +286,20 @@
     def waitUntilBuilderDetached(self, name):
         # convenience function for testing
         d = defer.Deferred()
-        b = self.builders[name]
+        b = self.builders.get(name, None)
+        if not b or not b.remote:
+            return defer.succeed(None)
         b.watchers['detach'].append(d)
         return d
 
+    def addSlave(self, slavename):
+        slave = BotPerspective(slavename)
+        self.slaves[slavename] = slave
+
+    def removeSlave(self, slavename):
+        d = self.slaves[slavename].disconnect
+        del self.slaves[slavename]
+
     def getBuildernames(self):
         return self.builderNames
 
@@ -233,24 +309,26 @@
         that build: the builds cannot be done until the right slave
         connects."""
         if self.debug: print "addBuilder", builder
+        log.msg("Botmaster.addBuilder(%s)" % builder.name)
+
         if builder.name in self.builderNames:
-            raise KeyError, "muliply defined builder '%s'" % builder.name
+            raise KeyError("muliply defined builder '%s'" % builder.name)
+        slavename = builder.slavename
+        if not self.slaves.has_key(slavename):
+            raise KeyError("builder %s uses undefined slave %s" % \
+                           (builder.name, slavename))
+
         self.builders[builder.name] = builder
         self.builderNames.append(builder.name)
         builder.setBotmaster(self)
         self.checkInactiveInterlocks() # TODO?: do this in caller instead?
-        if not self.slaveStatus.has_key(builder.slavename):
-            # this is a new slave, create a SlaveStatus object for it
-            s = SlaveStatus(builder.slavename)
-            self.slaveStatus[builder.slavename] = s
-        slave = self.slaves.get(builder.slavename)
-        if slave:
-            # there is an active slave which needs to be informed about the
-            # new builder
-            slave.addBuilder(builder)
+
+        slave = self.slaves[slavename]
+        slave.addBuilder(builder)
 
     def removeBuilder(self, builder):
         if self.debug: print "removeBuilder", builder
+        log.msg("Botmaster.removeBuilder(%s)" % builder.name)
         b = self.builders[builder.name]
         # any linked interlocks will be made inactive before the builder is
         # removed
@@ -266,18 +344,9 @@
             i.deactivate(self.builders)
         del self.builders[builder.name]
         self.builderNames.remove(builder.name)
-        # check for an active slave to remove the builder from
-        for slavename, slave in self.slaves.items():
-            if slavename == builder.slavename:
-                slave.removeBuilder(builder)
-        # now see if this was the last builder to use the slave
-        used = False
-        for b in self.builders.values():
-            if b.slavename == builder.slavename:
-                used = True
-                break
-        if not used:
-            del self.slaveStatus[builder.slavename]
+        slave = self.slaves.get(builder.slavename)
+        if slave:
+            slave.removeBuilder(builder)
 
     def addInterlock(self, interlock):
         """This is called by the setup code to create build interlocks:
@@ -312,40 +381,7 @@
             interlock.deactivate(self.builders)
 
     def getPerspective(self, slavename):
-        if self.slaves.has_key(slavename):
-            # uh-oh, we've got a duplicate slave. Try to figure out where the
-            # old one is coming from so we can explain the problem
-            log.msg("duplicate slave %s trying to connect" % slavename)
-            addr = self.slaves[slavename].slave.broker.transport.getPeer()
-            log.msg("old slave is connected from", addr)
-            # unfortunately the slave doesn't currently emit this message
-            raise ValueError("duplicate slave, old one connected from %s" \
-                             % addr)
-
-        slave_status = self.slaveStatus.get(slavename)
-        if not slave_status:
-            # TODO: this is probably broken w.r.t slaves connecting before
-            # their builders have been configured, or vice versa
-            slave_status = SlaveStatus(slavename)
-            self.slaveStatus[slavename] = slave_status
-        slave_status.connected = True
-
-        # Find all the builders that want to use this slave
-        builders = [b for (name, b) in self.builders.items()
-                    if b.slavename == slavename]
-        p = BotPerspective(slavename, builders, slave_status)
-        p.botmaster = self
-        self.slaves[slavename] = p
-        return p
-
-    def detach(self, p):
-        if not self.slaves[p.slavename] == p:
-            # TODO: I saw this happen, but I don't know why
-            log.msg("WEIRD, wrong slave '%s' saying goodbye" % p.slavename)
-            log.msg(" original:", self.slaves[p.slavename])
-            log.msg(" detaching:", p)
-        self.slaveStatus[p.slavename].connected = False
-        del self.slaves[p.slavename]
+        return self.slaves[slavename]
 
     def addChange(self, change):
         for b in self.builders.values():
@@ -459,6 +495,8 @@
 
     def requestAvatar(self, avatarID, mind, interface):
         assert interface == pb.IPerspective
+        log.msg("requestAvatar(%s) from %s" % \
+                (avatarID, mind.broker.transport.getPeer()))
         afactory = self.names.get(avatarID)
         if afactory:
             p = afactory.getPerspective()
@@ -474,14 +512,14 @@
             p = self.botmaster.getPerspective(avatarID)
 
         if not p:
-            raise ValueError, "no perspective for '%s'" % avatarID
-        p.attached(mind) # perhaps .callLater(0) ?
-        # TODO: especially for BotPerspectives
-        # TODO: the slave might be removed from BotMaster.slaves by the time
-        #  the .detached callback is run, causing the assert in
-        #  BotMaster.detach to fail
-        return (pb.IPerspective, p,
-                lambda p=p,mind=mind: p.detached(mind))
+            raise ValueError("no perspective for '%s'" % avatarID)
+
+        d = defer.maybeDeferred(p.attached, mind)
+        d.addCallback(self._avatarAttached, mind)
+        return d
+
+    def _avatarAttached(self, p, mind):
+        return (pb.IPerspective, p, lambda p=p,mind=mind: p.detached(mind))
 
 ########################################
 
@@ -537,6 +575,7 @@
 
         self.statusTargets = []
 
+        self.bots = []
         self.sources = []
 
         self.readConfig = False
@@ -689,6 +728,12 @@
             raise TypeError, "webPortnum '%s' must be an int" % webPortnum
         for s in status:
             assert interfaces.IStatusReceiver(s)
+        if 0: # tuple-specified builders are a problem
+            slavenames = [name for name,pw in bots]
+            for b in builders:
+                if b['slavename'] not in slavenames:
+                    raise ValueError("builder %s uses undefined slave %s" \
+                                     % (b['name'], b['slavename']))
 
         # now we're committed to implementing the new configuration, so do
         # it atomically
@@ -700,13 +745,7 @@
         # self.bots: Disconnect any that were attached and removed from the
         # list. Update self.checker with the new list of passwords,
         # including debug/change/status.
-        self.checker.users = {} # violates abstraction, oh well
-        for user, passwd in bots:
-            self.checker.addUser(user, passwd)
-        self.checker.addUser("change", "changepw")
-
-        # TODO: hang up on old bots
-        self.bots = bots
+        self.loadConfig_Slaves(bots)
 
         # self.debugPassword
         if debugPassword:
@@ -728,15 +767,7 @@
                 self.manhole = manhole
                 manhole.setServiceParent(self)
 
-        # self.sources: shut down any that were removed, start any that were
-        # added
-        old = self.sources
-        new = sources
-        [self.change_svc.removeSource(source)
-         for source in old if source not in new]
-        [self.change_svc.addSource(source)
-         for source in new if source not in old]
-        self.sources = sources
+        self.loadConfig_Sources(sources)
 
         # add/remove self.botmaster.builders to match builders. The
         # botmaster will handle startup/shutdown issues.
@@ -761,6 +792,35 @@
         
         log.msg("configuration updated")
 
+    def loadConfig_Slaves(self, bots):
+        # set up the Checker with the names and passwords of all valid bots
+        self.checker.users = {} # violates abstraction, oh well
+        for user, passwd in bots:
+            self.checker.addUser(user, passwd)
+        self.checker.addUser("change", "changepw")
+
+        # identify new/old bots
+        old = self.bots; oldnames = [name for name,pw in old]
+        new = bots; newnames = [name for name,pw in new]
+        # removeSlave will hang up on the old bot
+        [self.botmaster.removeSlave(name)
+         for name in oldnames if name not in newnames]
+        [self.botmaster.addSlave(name)
+         for name in newnames if name not in oldnames]
+
+        # all done
+        self.bots = bots
+
+    def loadConfig_Sources(self, sources):
+        # shut down any that were removed, start any that were added
+        old = self.sources
+        new = sources
+        [self.change_svc.removeSource(source)
+         for source in old if source not in new]
+        [self.change_svc.addSource(source)
+         for source in new if source not in old]
+        self.sources = sources
+
     def loadConfig_Builders(self, newBuilders):
         old = self.botmaster.getBuildernames()
         newNames = []





More information about the Commits mailing list