[Buildbot-commits] buildbot/buildbot/slave commands.py,1.17,1.18 bot.py,1.4,1.5

Brian Warner warner at users.sourceforge.net
Sat Dec 4 21:02:06 UTC 2004


Update of /cvsroot/buildbot/buildbot/buildbot/slave
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14277/buildbot/slave

Modified Files:
	commands.py bot.py 
Log Message:
* buildbot/slave/bot.py: clean up shutdown/lose-master code
(SlaveBuilder): make some attributes class-level, remove the old
"update queue" which existed to support resuming a build after the
master connection was lost. Try to reimplement that feature later.
(SlaveBuilder.stopCommand): clear self.command when the
SlaveCommand finishes, so that we don't try to kill a leftover one
at shutdown time.
(SlaveBuilder.commandComplete): same, merge with commandFailed and
.finishCommand

* buildbot/slave/commands.py (SourceBase): set self.command for
all VC commands, so they can be interrupted.


Index: bot.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/slave/bot.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- bot.py	3 Dec 2004 22:54:52 -0000	1.4
+++ bot.py	4 Dec 2004 21:02:03 -0000	1.5
@@ -44,32 +44,27 @@
 
     stopCommandOnShutdown = True
 
+    # remote is a ref to the Builder object on the master side, and is set
+    # when they attach. We use it to detect when the connection to the master
+    # is severed.
+    remote = None
+
+    # .build points to a SlaveBuild object, a new one for each build
+    build = None
+
+    # .command points to a SlaveCommand instance, and is set while the step
+    # is running. We use it to implement the stopBuild method.
+    command = None
+
+    # .remoteStep is a ref to the master-side BuildStep object, and is set
+    # when the step is started
+    remoteStep = None
+
     def __init__(self, parent, name, builddir, not_really):
         #service.Service.__init__(self)
         self.name = name
         self.builddir = builddir
         self.not_really = not_really
-        self.remote = None
-        # remote is a ref to the Builder object on the master side, and is
-        # set when they attach. It really isn't used very much
-        self.build = None
-        # .build points to a SlaveBuild object, a new one for each build
-        self.command = None
-        # .command points to a SlaveCommand instance, and is set when the
-        # step is started. It remains until the step is completed and the
-        # completion is acknowledged (via .ackComplete)
-        self.remoteStep = None
-        # .remoteStep is a ref to the master-side BuildStep object, and is
-        # set when the step is started
-        self.updateNum = None
-        # .updateNum counts status updates. It is reset to zero at the
-        # beginning of each step. Numbering the updates makes it possible to
-        # reattach to a master that has been restarted
-        self.updateQueue = []
-        # unacknowledged status updates are kept in .updateQueue, and are
-        # removed by .ackUpdate
-        self.complete = None
-        # an unacknowledged completion message lives in .complete
         
     def __repr__(self):
         return "<SlaveBuilder '%s'>" % self.name
@@ -83,9 +78,8 @@
 
     def stopService(self):
         service.Service.stopService(self)
-        if self.command and self.stopCommandOnShutdown:
+        if self.stopCommandOnShutdown:
             self.stopCommand()
-            self.command = None
 
     def remote_setMaster(self, remote):
         self.remote = remote
@@ -100,7 +94,7 @@
     def lostRemoteStep(self, remotestep):
         log.msg("lost remote step")
         self.remoteStep = None
-        if self.command and self.stopCommandOnShutdown:
+        if self.stopCommandOnShutdown:
             self.stopCommand()
         
     # the following are Commands that can be invoked by the master-side
@@ -111,6 +105,7 @@
         one step to the next."""
         self.build = SlaveBuild(self)
         log.msg("startBuild")
+
     def remote_startCommand(self, stepref, stepId, command, args):
         """This is called multiple times by various master-side BuildSteps,
         to start various commands that actually do the build."""
@@ -118,7 +113,6 @@
         if self.command:
             log.msg("leftover command, dropping it")
             self.stopCommand()
-            self.command = None
 
         try:
             factory, version = registry.commandRegistry[command]
@@ -127,14 +121,12 @@
         self.command = factory(self, stepId, args)
 
         log.msg(" startCommand:%s [id %s]" % (command,stepId))
-        self.updateQueue = []
         self.remoteStep = stepref
         self.remoteStep.notifyOnDisconnect(self.lostRemoteStep)
-        self.updateNum = 0
-        self.complete = None
         self.command.running = True
         d = defer.maybeDeferred(self.command.start)
-        d.addCallbacks(self.commandComplete, self.commandFailed)
+        d.addCallback(lambda res: None)
+        d.addBoth(self.commandComplete)
         return None
 
     def remote_interruptCommand(self, stepId, why):
@@ -147,120 +139,76 @@
             return
         self.command.interrupt()
 
+
     def stopCommand(self):
+        """Make any currently-running command die, with no further status
+        output. This is used when the buildslave is shutting down or the
+        connection to the master has been lost. Interrupt the command,
+        silence it, and then forget about it."""
         if not self.command:
             return
-        self.command.running = False
-        if not self.command.interrupted:
-            self.command.interrupt()
-        
+        log.msg("stopCommand: halting current command %s" % self.command)
+        self.command.running = False # shut up!
+        self.command.interrupt() # die!
+        self.command = None # forget you!
 
-    # these two are fired by the Deferred attached to each Command
-    def commandComplete(self, dummy):
-        if not self.running:
-            return
-        self.sendComplete()
-    def commandFailed(self, why):
-        if not self.running:
-            return
-        log.msg("commandFailed")
-        log.err(why)
-        self.sendComplete(why)
 
     # sendUpdate is invoked by the Commands we spawn
-    def sendUpdate(self, data=None):
+    def sendUpdate(self, data):
         """This sends the status update to the master-side BuildStep object,
         giving it a sequence number in the process. It adds the update to
         a queue, and asks the master to acknowledge the update so it can be
         removed from that queue."""
         if not self.running:
+            # .running comes from service.Service, and says whether the
+            # service is running or not. If we aren't running, don't send any
+            # status messages.
             return
-        self.updateNum += 1
-        update = [data, self.updateNum]
-        #log.msg("sendUpdate", update)
-        self.updateQueue.append(update)
-        if self.remoteStep: # ?? send to Builder or BuildStep?
+        # the update[1]=0 comes from the leftover 'updateNum', which the
+        # master still expects to receive. Provide it to avoid significant
+        # interoperability issues between new slaves and old masters.
+        if self.remoteStep:
+            update = [data, 0]
             updates = [update]
             d = self.remoteStep.callRemote("update", updates)
             d.addCallback(self.ackUpdate)
             d.addErrback(self._ackFailed, "SlaveBuilder.sendUpdate")
 
-    # these are utility routines used by sendStatus and commandComplete
-
-    def dummy(self, value):
+    def ackUpdate(self, acknum):
+        # TODO: update the "last activity" timer
         pass
 
-    def ackUpdate(self, acknum):
-        """Normally, the master responds to remote_update by returning the
-        update number of the highest contiguous update received. That number
-        comes back to this routine, which removes the acknowledged updates
-        from the queue."""
-        # XXX: revamp this, I think it needs a retransmission timeout to
-        # deal with sendAllUpdates that don't all get acknowledged. Might be
-        # ok, though.
-        unacked = []
-        for update in self.updateQueue:
-            (data, updatenum) = update
-            if updatenum > acknum:
-                unacked.append(update)
-        self.updateQueue = unacked
-        # also, if the terminal status message (resulting from
-        # commandComplete or commandFailed) is acked, we can finally get rid
-        # of the command by clearing .stepRef and .command. We have to do
-        # this, otherwise we'll think we're still running the command and
-        # won't be able to answer remote_reattach correctly. XXX: is that
-        # true?
+    def ackComplete(self, dummy):
+        # TODO: update the "last activity" timer
+        pass
 
     def _ackFailed(self, why, where):
         log.msg("SlaveBuilder._ackFailed:", where)
         #log.err(why) # we don't really care
 
-    def sendAllUpdates(self):
-        """This is called after reattachment to send all queued updates."""
-        if self.updateQueue and self.remoteStep:
-            d = self.remoteStep.callRemote("update", self.updateQueue)
-            d.addCallback(self.ackUpdate)
-            d.addErrback(self._ackFailed, "SlaveBuilder.sendAllUpdates")
 
-    def sendComplete(self, failure=None):
-        # failure, if present, is a failure.Failure. To send it across the
-        # wire, we must turn it into a pb.CopyableFailure.
+    # this is fired by the Deferred attached to each Command
+    def commandComplete(self, failure):
         if failure:
+            log.msg("SlaveBuilder.commandFailed", self.command)
+            log.err(why)
+            # failure, if present, is a failure.Failure. To send it across
+            # the wire, we must turn it into a pb.CopyableFailure.
             failure = pb.CopyableFailure(failure)
             failure.unsafeTracebacks = True
-        self.complete = [failure]
+        else:
+            # failure is None
+            log.msg("SlaveBuilder.commandComplete", self.command)
+        self.command = None
+        if not self.running:
+            return
         if self.remoteStep:
+            self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep)
             d = self.remoteStep.callRemote("complete", failure)
             d.addCallback(self.ackComplete)
             d.addErrback(self._ackFailed, "sendComplete")
+            self.remoteStep = None
 
-    def ackComplete(self, dummy):
-        # this is the call that finally finishes the step
-        self.finishCommand()
-
-    def sendAllCompletes(self):
-        if self.complete and self.remoteStep:
-            d = self.remoteStep.callRemote("complete", self.complete[0])
-            d.addCallback(self.ackComplete)
-            d.addErrback(self._ackFailed, "sendAllCompletes")
-
-    def remote_reattach(self, stepref, stepId):
-        # were we executing something?
-        if not self.command:
-            raise NoCommandRunning
-        # were we executing the same thing that they think we were?
-        if self.command.stepId != stepId:
-            raise WrongCommandRunning
-        # send them our unacked status
-        self.remoteStep = stepref
-        self.sendAllUpdates()
-        self.sendAllCompletes()
-
-    def finishCommand(self):
-        log.msg("SlaveBuilder.finishCommand", self.command)
-        self.remoteStep.dontNotifyOnDisconnect(self.lostRemoteStep)
-        self.remoteStep = None
-        self.command = None
 
     def remote_shutdown(self):
         print "slave shutting down on command from master"

Index: commands.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/slave/commands.py,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -d -r1.17 -r1.18
--- commands.py	3 Dec 2004 22:54:52 -0000	1.17
+++ commands.py	4 Dec 2004 21:02:03 -0000	1.18
@@ -305,7 +305,7 @@
 
     If .running is False, the bot is shutting down (or has otherwise lost the
     connection to the master), and should not send any status messages. This
-    is taken care of in Command.sendStatus .
+    is checked in Command.sendStatus .
 
     """
 
@@ -329,7 +329,11 @@
         pass
         
     def start(self):
-        # should return a Deferred
+        """Start the command. self.running will be set just before this is
+        called. This method should return a Deferred that will fire when the
+        command has completed.
+
+        This method should be overridden by subclasses."""
         raise NotImplementedError, "You must implement this in a subclass"
 
     def sendStatus(self, status):
@@ -343,10 +347,12 @@
 
     def interrupt(self):
         """Override this in a subclass to allow commands to be interrupted.
-        May be called multiple times, use self.interrupted=True if this
-        matters."""
+        May be called multiple times, test and set self.interrupted=True if
+        this matters."""
         pass
 
+    # utility methods, mostly used by SlaveShellCommand and the like
+
     def _abandonOnFailure(self, rc):
         if type(rc) is not int:
             log.msg("weird, _abandonOnFailure was given rc=%s (%s)" % \
@@ -423,6 +429,8 @@
         return self.d
 
     def interrupt(self):
+        if self.interrupted:
+            return
         self.timer.cancel()
         self.timer = None
         self.interrupted = True
@@ -578,6 +586,7 @@
         command = ["rm", "-rf", d]
         c = ShellCommand(self.builder, command, self.builder.basedir,
                          sendRC=0, timeout=self.timeout)
+        self.command = c
         # sendRC=0 means the rm command will send stdout/stderr to the
         # master, but not the rc=0 when it finishes. That job is left to
         # _sendRC
@@ -595,6 +604,7 @@
         command = ['cp', '-r', fromdir, todir]
         c = ShellCommand(self.builder, command, self.builder.basedir,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         d = c.start()
         d.addCallback(self._abandonOnFailure)
         return d
@@ -609,6 +619,7 @@
         c = ShellCommand(self.builder, command, dir,
                          sendRC=False, timeout=self.timeout,
                          stdin=diff)
+        self.command = c
         d = c.start()
         d.addCallback(self._abandonOnFailure)
         return d
@@ -651,6 +662,7 @@
             c = ShellCommand(self.builder, command, d,
                              sendRC=False, timeout=self.timeout,
                              stdin=self.login+"\n")
+            self.command = c
             d = c.start()
             d.addCallback(self._abandonOnFailure)
             d.addCallback(self._didLogin)
@@ -671,6 +683,7 @@
             command += ['-D', self.revision]
         c = ShellCommand(self.builder, command, d,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         return c.start()
 
     def doVCFull(self):
@@ -689,6 +702,7 @@
         command += [self.cvsmodule]
         c = ShellCommand(self.builder, command, d,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         return c.start()
 
 registerSlaveCommand("cvs", CVS, cvs_ver)
@@ -720,6 +734,7 @@
         command = ['svn', 'update', '--revision', str(revision)]
         c = ShellCommand(self.builder, command, d,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         return c.start()
 
     def doVCFull(self):
@@ -734,6 +749,7 @@
                        self.svnurl, self.srcdir]
         c = ShellCommand(self.builder, command, d,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         return c.start()
 
 registerSlaveCommand("svn", SVN, cvs_ver)
@@ -765,6 +781,7 @@
         command = ['darcs', 'pull', '--all', '--verbose']
         c = ShellCommand(self.builder, command, d,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         return c.start()
 
     def doVCFull(self):
@@ -776,6 +793,7 @@
                    self.repourl]
         c = ShellCommand(self.builder, command, d,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         return c.start()
 
 registerSlaveCommand("darcs", Darcs, cvs_ver)
@@ -812,6 +830,7 @@
         command = ['tla', 'update']
         c = ShellCommand(self.builder, command, d,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         return c.start()
 
     def doVCFull(self):
@@ -830,6 +849,7 @@
         c = ShellCommand(self.builder, command, self.builder.basedir,
                          sendRC=False, keepStdout=True,
                          timeout=self.timeout)
+        self.command = c
         d = c.start()
         d.addCallback(self._abandonOnFailure)
         d.addCallback(self._didRegister, c)
@@ -851,6 +871,7 @@
                    self.version, self.srcdir]
         c = ShellCommand(self.builder, command, self.builder.basedir,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         d = c.start()
         d.addCallback(self._abandonOnFailure)
         if self.buildconfig:
@@ -862,6 +883,7 @@
         command = ['tla', 'build-config', self.buildconfig]
         c = ShellCommand(self.builder, command, d,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         d = c.start()
         d.addCallback(self._abandonOnFailure)
         return d
@@ -893,6 +915,7 @@
         env = {'P4PORT': self.p4port}
         c = ShellCommand(self.builder, command, d, environ=env,
                          sendRC=False, timeout=self.timeout)
+        self.command = c
         return c.start()
 
     def doVCFull(self):





More information about the Commits mailing list