[Buildbot-commits] buildbot/buildbot/process base.py,1.59,1.60 builder.py,1.31,1.32

Brian Warner warner at users.sourceforge.net
Fri Oct 14 19:42:42 UTC 2005


Update of /cvsroot/buildbot/buildbot/buildbot/process
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32254/buildbot/process

Modified Files:
	base.py builder.py 
Log Message:
Revision: arch at buildbot.sf.net--2004/buildbot--dev--0--patch-326
Creator:  Brian Warner <warner at lothar.com>

implement multiple slaves per Builder, allowing concurrent Builds

	* lots: implement multiple slaves per Builder, which means multiple
	current builds per Builder. Some highlights:
	* buildbot/interfaces.py (IBuilderStatus.getState): return a tuple
	of (state,currentBuilds) instead of (state,currentBuild)
	(IBuilderStatus.getCurrentBuilds): replace getCurrentBuild()
	(IBuildStatus.getSlavename): new method, so you can tell which
	slave got used. This only gets set when the build completes.
	(IBuildRequestStatus.getBuilds): new method

	* buildbot/process/builder.py (SlaveBuilder): add a .state
	attribute to track things like ATTACHING and IDLE and BUILDING,
	instead of..
	(Builder): .. the .slaves attribute here, which has been turned
	into a simple list of available slaves. Added a separate
	attaching_slaves list to track ones that are not yet ready for
	builds.
	(Builder.fireTestEvent): put off the test-event callback for a
	reactor turn, to make tests a bit more consistent.
	(Ping): cleaned up the slaveping a bit, now it disconnects if the
	ping fails due to an exception. This needs work, I'm worried that
	a code error could lead to a constantly re-connecting slave.
	Especially since I'm trying to move to a distinct remote_ping
	method, separate from the remote_print that we currently use.
	(BuilderControl.requestBuild): return a convenience Deferred that
	provides an IBuildStatus when the build finishes.
	(BuilderControl.ping): ping all connected slaves, only return True
	if they all respond.

	* buildbot/slave/bot.py (BuildSlave.stopService): stop trying to
	reconnect when we shut down.

	* buildbot/status/builder.py: implement new methods, convert
	one-build-at-a-time methods to handle multiple builds
	* buildbot/status/*.py: do the same in all default status targets
	* buildbot/status/html.py: report the build's slavename in the
	per-Build page, report all buildslaves on the per-Builder page

	* buildbot/test/test_run.py: update/create tests
	* buildbot/test/test_slaves.py: same
	* buildbot/test/test_scheduler.py: remove stale test

	* docs/buildbot.texinfo: document the new builder-specification
	'slavenames' parameter


Index: base.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/process/base.py,v
retrieving revision 1.59
retrieving revision 1.60
diff -u -d -r1.59 -r1.60
--- base.py	7 Oct 2005 18:37:21 -0000	1.59
+++ base.py	14 Oct 2005 19:42:40 -0000	1.60
@@ -212,7 +212,7 @@
         return files
 
     def __repr__(self):
-        return "<Build %s>" % (self.builder.name)
+        return "<Build %s>" % (self.builder.name,)
 
     def __getstate__(self):
         d = self.__dict__.copy()
@@ -266,6 +266,7 @@
         log.msg("%s.startBuild" % self)
         self.build_status = build_status
         self.slavebuilder = slavebuilder
+        self.slavename = slavebuilder.slave.slavename
         self.locks = [l.getLock(self.slavebuilder) for l in self.locks]
         self.remote = slavebuilder.remote
         self.remote.notifyOnDisconnect(self.lostRemote)
@@ -509,6 +510,7 @@
         self.results = results
 
         log.msg(" %s: build finished" % self)
+        self.build_status.setSlavename(self.slavename)
         self.build_status.setText(text)
         self.build_status.setColor(color)
         self.build_status.setResults(results)

Index: builder.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/process/builder.py,v
retrieving revision 1.31
retrieving revision 1.32
diff -u -d -r1.31 -r1.32
--- builder.py	7 Oct 2005 18:37:21 -0000	1.31
+++ builder.py	14 Oct 2005 19:42:40 -0000	1.32
@@ -13,12 +13,19 @@
 from buildbot.util import now
 from buildbot.process import base
 
+(ATTACHING, # slave attached, still checking hostinfo/etc
+ IDLE, # idle, available for use
+ PINGING, # build about to start, making sure it is still alive
+ BUILDING, # build is running
+ ) = range(4)
+
 class SlaveBuilder(pb.Referenceable):
     """I am the master-side representative for one of the
     L{buildbot.slave.bot.SlaveBuilder} objects that lives in a remote
     buildbot. When a remote builder connects, I query it for command versions
     and then make it available to any Builds that are ready to run. """
 
+    state = ATTACHING
     remote = None
     build = None
 
@@ -60,6 +67,8 @@
         return why
 
     def detached(self):
+        log.msg("Buildslave %s detached from %s" % (self.slave.slavename,
+                                                    self.builder.name))
         self.slave = None
         self.remote = None
         self.remoteCommands = None
@@ -117,7 +126,11 @@
         self.running = True
         log.msg("sending ping")
         self.d = defer.Deferred()
-        remote.callRemote("print", "ping").addBoth(self._pong)
+        # TODO: add a distinct 'ping' command on the slave.. using 'print'
+        # for this purpose is kind of silly.
+        remote.callRemote("print", "ping").addCallbacks(self._pong,
+                                                        self._ping_failed,
+                                                        errbackArgs=(remote,))
 
         # We use either our own timeout or the (long) TCP timeout to detect
         # silently-missing slaves. This might happen because of a NAT
@@ -135,20 +148,31 @@
         remote.broker.transport.loseConnection()
         # the forcibly-lost connection will now cause the ping to fail
 
-    def _pong(self, res):
+    def _stopTimer(self):
         if not self.running:
             return
         self.running = False
 
-        log.msg("ping finished")
         if self.timer:
             self.timer.cancel()
             del self.timer
 
-        if isinstance(res, failure.Failure):
-            self.d.callback(False)
-        else:
-            self.d.callback(True)
+    def _pong(self, res):
+        log.msg("ping finished: success")
+        self._stopTimer()
+        self.d.callback(True)
+
+    def _ping_failed(self, res, remote):
+        log.msg("ping finished: failure")
+        self._stopTimer()
+        # the slave has some sort of internal error, disconnect them. If we
+        # don't, we'll requeue a build and ping them again right away,
+        # creating a nasty loop.
+        remote.broker.transport.loseConnection()
+        # TODO: except, if they actually did manage to get this far, they'll
+        # probably reconnect right away, and we'll do this game again. Maybe
+        # it would be better to leave them in the PINGING state.
+        self.d.callback(False)
 
 
 class Builder(pb.Referenceable):
@@ -172,9 +196,9 @@
     discard it.
 
     I maintain a list of available SlaveBuilders, one for each connected
-    slave that the C{slavename} parameter says we can use. Some of these will
-    be idle, some of them will be busy running builds for me. If there are
-    multiple slaves, I can run multiple builds at once.
+    slave that the C{slavenames} parameter says we can use. Some of these
+    will be idle, some of them will be busy running builds for me. If there
+    are multiple slaves, I can run multiple builds at once.
 
     I also manage forced builds, progress expectation (ETA) management, and
     some status delivery chores.
@@ -201,11 +225,15 @@
         @type  setup: dict
         @param setup: builder setup data, as stored in
                       BuildmasterConfig['builders'].  Contains name,
-                      slavename, builddir, factory, locks.
+                      slavename(s), builddir, factory, locks.
         @type  builder_status: L{buildbot.status.builder.BuilderStatus}
         """
         self.name = setup['name']
-        self.slavename = setup['slavename']
+        self.slavenames = []
+        if setup.has_key('slavename'):
+            self.slavenames.append(setup['slavename'])
+        if setup.has_key('slavenames'):
+            self.slavenames.extend(setup['slavenames'])
         self.builddir = setup['builddir']
         self.buildFactory = setup['factory']
         self.locks = setup.get("locks", [])
@@ -218,26 +246,34 @@
         self.buildable = []
         self.building = []
 
-        # buildslaves at our disposal. This maps SlaveBuilder instances to
-        # state, where state is one of "attaching", "idle", "pinging",
-        # "busy". "pinging" is used when a Build is about to start, to make
-        # sure that they're still alive.
-        self.slaves = {} 
+        # buildslaves which have connected but which are not yet available.
+        # These are always in the ATTACHING state.
+        self.attaching_slaves = []
+
+        # buildslaves at our disposal. Each SlaveBuilder instance has a
+        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
+        # Build is about to start, to make sure that they're still alive.
+        self.slaves = []
 
         self.builder_status = builder_status
-        self.builder_status.setSlavename(self.slavename)
+        self.builder_status.setSlavenames(self.slavenames)
 
         # for testing, to help synchronize tests
-        self.watchers = {'attach': [], 'detach': [], 'idle': []}
+        self.watchers = {'attach': [], 'detach': [], 'detach_all': [],
+                         'idle': []}
 
     def setBotmaster(self, botmaster):
         self.botmaster = botmaster
 
     def compareToSetup(self, setup):
         diffs = []
-        if setup['slavename'] != self.slavename:
-            diffs.append('slavename changed from %s to %s' \
-                         % (self.slavename, setup['slavename']))
+        setup_slavenames = []
+        if setup.has_key('slavename'):
+            setup_slavenames.append(setup['slavename'])
+        setup_slavenames.extend(setup.get('slavenames', []))
+        if setup_slavenames != self.slavenames:
+            diffs.append('slavenames changed from %s to %s' \
+                         % (self.slavenames, setup_slavenames))
         if setup['builddir'] != self.builddir:
             diffs.append('builddir changed from %s to %s' \
                          % (self.builddir, setup['builddir']))
@@ -279,7 +315,7 @@
     def __setstate__(self, d):
         self.__dict__ = d
         self.building = []
-        self.slaves = {}
+        self.slaves = []
 
     def fireTestEvent(self, name, with=None):
         if with is None:
@@ -287,7 +323,7 @@
         watchers = self.watchers[name]
         self.watchers[name] = []
         for w in watchers:
-            w.callback(with)
+            reactor.callLater(0, w.callback, with)
 
     def attached(self, slave, remote, commands):
         """This is invoked by the BotPerspective when the self.slavename bot
@@ -305,7 +341,7 @@
         @return: a Deferred that fires (with 'self') when the slave-side
                  builder is fully attached and ready to accept commands.
         """
-        for s in self.slaves.keys():
+        for s in self.attaching_slaves + self.slaves:
             if s.slave == slave:
                 # already attached to them. This is fairly common, since
                 # attached() gets called each time we receive the builder
@@ -319,14 +355,14 @@
                 # Therefore, when we see that we're already attached, we can
                 # just ignore it. TODO: build a diagram of the state
                 # transitions here, I'm concerned about sb.attached() failing
-                # and leaving self.slaves[sb] stuck at 'attaching', and about
+                # and leaving sb.state stuck at 'ATTACHING', and about
                 # the detached() message arriving while there's some
                 # transition pending such that the response to the transition
-                # re-vivifies self.slaves[sb]
+                # re-vivifies sb
                 return defer.succeed(self)
 
         sb = SlaveBuilder(self)
-        self.slaves[sb] = "attaching"
+        self.attaching_slaves.append(sb)
         d = sb.attached(slave, remote, commands)
         d.addCallback(self._attached)
         d.addErrback(self._not_attached, slave)
@@ -335,7 +371,9 @@
     def _attached(self, sb):
         # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ?
         self.builder_status.addPointEvent(['connect', sb.slave.slavename])
-        self.slaves[sb] = "idle"
+        sb.state = IDLE
+        self.attaching_slaves.remove(sb)
+        self.slaves.append(sb)
         self.maybeStartBuild()
 
         self.fireTestEvent('attach')
@@ -354,10 +392,17 @@
     def detached(self, slave):
         """This is called when the connection to the bot is lost."""
         log.msg("%s.detached" % self, slave.slavename)
-        for sb in self.slaves.keys():
+        for sb in self.attaching_slaves + self.slaves:
             if sb.slave == slave:
                 break
-        if self.slaves[sb] == "busy":
+        else:
+            log.msg("WEIRD: Builder.detached(%s) (%s)"
+                    " not in attaching_slaves(%s)"
+                    " or slaves(%s)" % (slave, slave.slavename,
+                                        self.attaching_slaves,
+                                        self.slaves))
+            return
+        if sb.state == BUILDING:
             # the Build's .lostRemote method (invoked by a notifyOnDisconnect
             # handler) will cause the Build to be stopped, probably right
             # after the notifyOnDisconnect that invoked us finishes running.
@@ -366,13 +411,18 @@
             #self.retryBuild(sb.build)
             pass
 
-        del self.slaves[sb]
+        if sb in self.attaching_slaves:
+            self.attaching_slaves.remove(sb)
+        if sb in self.slaves:
+            self.slaves.remove(sb)
 
         # TODO: make this .addSlaveEvent?
         self.builder_status.addPointEvent(['disconnect', slave.slavename])
         sb.detached() # inform the SlaveBuilder that their slave went away
         self.updateBigStatus()
         self.fireTestEvent('detach')
+        if not self.slaves:
+            self.fireTestEvent('detach_all')
 
     def updateBigStatus(self):
         if not self.slaves:
@@ -388,14 +438,15 @@
         if not self.buildable:
             self.updateBigStatus()
             return # nothing to do
-        idle_slaves = [sb for sb in self.slaves.keys()
-                       if self.slaves[sb] == "idle"]
-        if not idle_slaves:
+        # find the first idle slave
+        for sb in self.slaves:
+            if sb.state == IDLE:
+                break
+        else:
             log.msg("%s: want to start build, but we don't have a remote"
                     % self)
             self.updateBigStatus()
             return
-        sb = idle_slaves[0]
 
         # there is something to build, and there is a slave on which to build
         # it. Grab the oldest request, see if we can merge it with anything
@@ -431,8 +482,9 @@
 
         self.building.append(build)
 
-        # claim the slave
-        self.slaves[sb] = "pinging"
+        # claim the slave. TODO: consider moving changes to sb.state inside
+        # SlaveBuilder.. that would be cleaner.
+        sb.state = PINGING
         sb.startBuild(build)
 
         self.updateBigStatus()
@@ -451,7 +503,7 @@
         if not res:
             return self._startBuildFailed("slave ping failed", build, sb)
         # The buildslave is ready to go.
-        self.slaves[sb] = "building"
+        sb.state = BUILDING
         d = sb.remote.callRemote("startBuild")
         d.addCallbacks(self._startBuild_2, self._startBuildFailed,
                        callbackArgs=(build,sb), errbackArgs=(build,sb))
@@ -479,14 +531,7 @@
                 "remote_startBuild failed: %s" % (build, why))
         # release the slave
         sb.finishBuild()
-        if sb in self.slaves:
-            self.slaves[sb] = "idle"
-        else:
-            # if the startBuild message failed because we lost the slave, our
-            # detacted() method will have already fired, removing the
-            # SlaveBuilder from self.slaves . This test is here to make sure
-            # we don't re-create the old self.slaves[sb] entry.
-            pass
+        sb.state = IDLE
 
         log.msg("re-queueing the BuildRequest")
         self.building.remove(build)
@@ -509,8 +554,7 @@
 
         # release the slave
         sb.finishBuild()
-        if sb in self.slaves:
-            self.slaves[sb] = "idle"
+        sb.state = IDLE
         # otherwise the slave probably got removed in detach()
 
         self.building.remove(build)
@@ -561,12 +605,17 @@
 
         warnings.warn("Please use BuilderControl.requestBuild instead",
                       category=DeprecationWarning, stacklevel=1)
-        idle_slaves = [sb for sb in self.original.slaves
-                       if self.original.slaves[sb] == "idle"]
-        if not idle_slaves:
+
+        # see if there is an idle slave, so we can emit an appropriate error
+        # message
+        for sb in self.original.slaves:
+            if sb.state == IDLE:
+                break
+        else:
             if self.original.building:
                 raise interfaces.BuilderInUseError("All slaves are in use")
             raise interfaces.NoSlaveError("There are no slaves connected")
+
         req = base.BuildRequest(reason, sourcestamp.SourceStamp())
         self.requestBuild(req)
         # this is a hack that fires the Deferred for the first build and
@@ -587,7 +636,13 @@
         return w.wait()
 
     def requestBuild(self, req):
+        """Submit a BuildRequest to this Builder. Returns a Deferred that
+        fires when the BuildRequest finishes, the same as doing
+        req.waitUntilFinished . This Deferred will fire with an
+        L{buildbot.interfaces.IBuildStatus} instance."""
+        d = req.waitUntilFinished()
         self.original.submitBuildRequest(req)
+        return d
 
     def getPendingBuilds(self):
         # return IBuildRequestControl objects
@@ -604,8 +659,17 @@
             self.original.builder_status.addPointEvent(["ping", "no slave"],
                                                        "red")
             return defer.succeed(False) # interfaces.NoSlaveError
-        d = self.original.slaves.keys()[0].ping(timeout,
-                                                self.original.builder_status)
+        dl = []
+        for s in self.original.slaves:
+            dl.append(s.ping(timeout, self.original.builder_status))
+        d = defer.DeferredList(dl)
+        d.addCallback(self._gatherPingResults)
         return d
 
+    def _gatherPingResults(self, res):
+        for ignored,success in res:
+            if not success:
+                return False
+        return True
+
 components.registerAdapter(BuilderControl, Builder, interfaces.IBuilderControl)





More information about the Commits mailing list