[Buildbot-commits] buildbot/buildbot/slave commands.py,1.35,1.36

Brian Warner warner at users.sourceforge.net
Tue Jul 19 01:55:24 UTC 2005


Update of /cvsroot/buildbot/buildbot/buildbot/slave
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv10106/buildbot/slave

Modified Files:
	commands.py 
Log Message:
Revision: arch at buildbot.sf.net--2004/buildbot--dev--0--patch-234
Creator:  Brian Warner <warner at monolith.lothar.com>

overhaul ShellCommand timeout/interrupt/cleanup, add tests

	* buildbot/slave/commands.py (ShellCommand): overhaul
	error-handling code, to try and make timeout/interrupt work
	properly, and make win32 happier
	* buildbot/test/test_slavecommand.py: clean up, stop using
	reactor.iterate, add tests for timeout and interrupt
	* buildbot/test/sleep.py: utility for a new timeout test

	* buildbot/twcompat.py: copy over twisted 1.3/2.0 compatibility
	code from the local-usebranches branch


Index: commands.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/slave/commands.py,v
retrieving revision 1.35
retrieving revision 1.36
diff -u -d -r1.35 -r1.36
--- commands.py	17 May 2005 22:19:18 -0000	1.35
+++ commands.py	19 Jul 2005 01:55:21 -0000	1.36
@@ -64,6 +64,12 @@
     def connectionMade(self):
         if self.debug:
             log.msg("ShellCommandPP.connectionMade")
+        if not self.command.process:
+            if self.debug:
+                log.msg(" assigning self.command.process: %s" %
+                        (self.transport,))
+            self.command.process = self.transport
+
         if self.command.stdin:
             if self.debug: log.msg(" writing to stdin")
             self.transport.write(self.command.stdin)
@@ -107,6 +113,8 @@
     # child shell.
 
     notreally = False
+    BACKUP_TIMEOUT = 5
+    KILL = "KILL"
 
     def __init__(self, builder, command,
                  workdir, environ=None,
@@ -133,7 +141,6 @@
         self.stdin = stdin
         self.timeout = timeout
         self.timer = None
-        self.interrupted = 0
         self.keepStdout = keepStdout
 
         # usePTY=True is a convenience for cleaning up all children and
@@ -219,10 +226,23 @@
         log.msg(" " + msg)
         self.sendStatus({'header': msg+"\n"})
 
-        self.process = reactor.spawnProcess(self.pp, argv[0], argv,
-                                            self.environ,
-                                            self.workdir,
-                                            usePTY=self.usePTY)
+        # win32eventreactor's spawnProcess (under twisted <= 2.0.1) returns
+        # None, as opposed to all the posixbase-derived reactors (which
+        # return the new Process object). This is a nuisance. We can make up
+        # for it by having the ProcessProtocol give us their .transport
+        # attribute after they get one. I'd prefer to get it from
+        # spawnProcess because I'm concerned about returning from this method
+        # without having a valid self.process to work with. (if kill() were
+        # called right after we return, but somehow before connectionMade
+        # were called, then kill() would blow up).
+        self.process = None
+        p = reactor.spawnProcess(self.pp, argv[0], argv,
+                                 self.environ,
+                                 self.workdir,
+                                 usePTY=self.usePTY)
+        # connectionMade might have been called during spawnProcess
+        if not self.process:
+            self.process = p
 
         # connectionMade also closes stdin as long as we're not using a PTY.
         # This is intended to kill off inappropriately interactive commands
@@ -230,25 +250,20 @@
         # enhanced to allow the same childFDs argument that Process takes,
         # which would let us connect stdin to /dev/null .
 
-
         if self.timeout:
             self.timer = reactor.callLater(self.timeout, self.doTimeout)
 
     def addStdout(self, data):
-        if self.interrupted: return
         if self.sendStdout: self.sendStatus({'stdout': data})
         if self.keepStdout: self.stdout += data
         if self.timer: self.timer.reset(self.timeout)
 
     def addStderr(self, data):
-        if self.interrupted: return
         if self.sendStderr: self.sendStatus({'stderr': data})
         if self.timer: self.timer.reset(self.timeout)
 
     def finished(self, sig, rc):
         log.msg("command finished with signal %s, exit code %s" % (sig,rc))
-        if self.interrupted:
-            return
         if sig is not None:
             rc = -1
         if self.sendRC:
@@ -259,22 +274,38 @@
         if self.timer:
             self.timer.cancel()
             self.timer = None
-        self.deferred.callback(rc)
+        d = self.deferred
+        self.deferred = None
+        if d:
+            d.callback(rc)
+        else:
+            log.msg("Hey, command %s finished twice" % self)
+
+    def failed(self, why):
+        log.msg("ShellCommand.failed: command failed: %s" % (why,))
+        if self.timer:
+            self.timer.cancel()
+            self.timer = None
+        d = self.deferred
+        self.deferred = None
+        if d:
+            d.errback(why)
+        else:
+            log.msg("Hey, command %s finished twice" % self)
 
     def doTimeout(self):
+        self.timer = None
         msg = "command timed out: %d seconds without output" % self.timeout
         self.kill(msg)
 
     def kill(self, msg):
-        if not self.process:
-            msg += ", but there is no current process, finishing anyway"
-            log.msg(msg)
-            self.sendStatus({'header': "\n" + msg + "\n"})
-            if self.pp:
-                self.pp.command = None
-            self.commandFailed(CommandInterrupted("no process to interrupt"))
-            return
-        msg += ", killing pid %d" % self.process.pid
+        # This may be called by the timeout, or when the user has decided to
+        # abort this build.
+        if self.timer:
+            self.timer.cancel()
+            self.timer = None
+        if hasattr(self.process, "pid"):
+            msg += ", killing pid %d" % self.process.pid
         log.msg(msg)
         self.sendStatus({'header': "\n" + msg + "\n"})
 
@@ -285,36 +316,65 @@
                 # Groups are ideal for this, but that requires
                 # spawnProcess(usePTY=1). Try both ways in case process was
                 # not started that way.
-                log.msg("trying os.kill(-pid, signal.SIGKILL)")
-                os.kill(-self.process.pid, signal.SIGKILL)
-                log.msg(" successful")
-                hit = 1
+
+                # the test suite sets self.KILL=None to tell us we should
+                # only pretend to kill the child. This lets us test the
+                # backup timer.
+
+                sig = None
+                if self.KILL is not None:
+                    sig = getattr(signal, "SIG"+ self.KILL, None)
+
+                if self.KILL == None:
+                    log.msg("self.KILL==None, only pretending to kill child")
+                elif sig is None:
+                    log.msg("signal module is missing SIG%s" % self.KILL)
+                elif not hasattr(os, "kill"):
+                    log.msg("os module is missing the 'kill' function")
+                else:
+                    log.msg("trying os.kill(-pid, %d)" % (sig,))
+                    os.kill(-self.process.pid, sig)
+                    log.msg(" signal %s sent successfully" % sig)
+                    hit = 1
             except OSError:
                 # probably no-such-process, maybe because there is no process
                 # group
                 pass
         if not hit:
             try:
-                log.msg("trying process.signalProcess('KILL')")
-                self.process.signalProcess('KILL')
-                log.msg(" successful")
-                hit = 1
+                if self.KILL is None:
+                    log.msg("self.KILL==None, only pretending to kill child")
+                else:
+                    log.msg("trying process.signalProcess('KILL')")
+                    self.process.signalProcess(self.KILL)
+                    log.msg(" signal %s sent successfully" % (self.KILL,))
+                    hit = 1
             except OSError:
                 # could be no-such-process, because they finished very recently
                 pass
         if not hit:
             log.msg("signalProcess/os.kill failed both times")
-        # finished ought to be called momentarily
-        self.timer = reactor.callLater(5, self.doBackupTimeout) # just in case
+
+        if runtime.platformType == "posix":
+            # we only do this under posix because the win32eventreactor
+            # blocks here until the process has terminated, while closing
+            # stderr. This is weird.
+            self.pp.transport.loseConnection()
+
+        # finished ought to be called momentarily. Just in case it doesn't,
+        # set a timer which will abandon the command.
+        self.timer = reactor.callLater(self.BACKUP_TIMEOUT,
+                                       self.doBackupTimeout)
 
     def doBackupTimeout(self):
-        # we tried to kill the process, and it wouldn't die. Finish anyway.
-        self.sendStatus({'header': "SIGKILL failed to kill process\n"})
+        log.msg("we tried to kill the process, and it wouldn't die.."
+                " finish anyway")
         self.timer = None
-        self.pp.command = None # take away its voice
-        # note, if/when the command finally does complete, an exception will
-        # be raised as pp tries to send status through .command
-        self.commandFailed(TimeoutError("SIGKILL failed to kill process"))
+        self.sendStatus({'header': "SIGKILL failed to kill process\n"})
+        if self.sendRC:
+            self.sendStatus({'header': "using fake rc=-1\n"})
+            self.sendStatus({'rc': -1})
+        self.failed(TimeoutError("SIGKILL failed to kill process"))
 
 
 class Command:





More information about the Commits mailing list