[Buildbot-commits] buildbot/buildbot/status html.py,1.62,1.63 builder.py,1.54,1.55

Brian Warner warner at users.sourceforge.net
Sun May 15 23:43:59 UTC 2005


Update of /cvsroot/buildbot/buildbot/buildbot/status
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv688/buildbot/status

Modified Files:
	html.py builder.py 
Log Message:
Revision: arch at buildbot.sf.net--2004/buildbot--dev--0--patch-171
Creator:  Brian Warner <warner at monolith.lothar.com>

handle large logfiles without consuming lots of memory

Merged from warner at monolith.lothar.com--2005 (patch 19-25)

Patches applied:

 * warner at monolith.lothar.com--2005/buildbot--dev--0--patch-19
   Merged from arch at buildbot.sf.net--2004 (patch 159-160)

 * warner at monolith.lothar.com--2005/buildbot--dev--0--patch-20
   Merged from arch at buildbot.sf.net--2004 (patch 161-166)

 * warner at monolith.lothar.com--2005/buildbot--dev--0--patch-21
   Merged from arch at buildbot.sf.net--2004 (patch 167)

 * warner at monolith.lothar.com--2005/buildbot--dev--0--patch-22
   Merged from arch at buildbot.sf.net--2004 (patch 168)

 * warner at monolith.lothar.com--2005/buildbot--dev--0--patch-23
   Merged from arch at buildbot.sf.net--2004 (patch 169)

 * warner at monolith.lothar.com--2005/buildbot--dev--0--patch-24
   Merged from arch at buildbot.sf.net--2004 (patch 170)

 * warner at monolith.lothar.com--2005/buildbot--dev--0--patch-25
   handle large log files without using lots of memory


Index: builder.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/status/builder.py,v
retrieving revision 1.54
retrieving revision 1.55
diff -u -d -r1.54 -r1.55
--- builder.py	12 May 2005 21:55:57 -0000	1.54
+++ builder.py	15 May 2005 23:43:57 -0000	1.55
@@ -3,10 +3,11 @@
 from __future__ import generators
 
 from twisted.python import log, components
+from twisted.persisted import styles
 from twisted.internet import reactor, defer
 from twisted.protocols import basic
 
-import time, os.path, shutil, sys, re
+import time, os, os.path, shutil, sys, re
 try:
     import cPickle as pickle
 except ImportError:
@@ -42,55 +43,185 @@
 HEADER = 2
 ChunkTypes = ["stdout", "stderr", "header"]
 
-class LogFileStub:
-    __implements__ = interfaces.IStatusLogStub,
+class LogFileScanner(basic.NetstringReceiver):
+    def __init__(self, chunk_cb, channels=[]):
+        self.chunk_cb = chunk_cb
+        self.channels = channels
 
-    def __init__(self, logfile):
-        # stubs are created from a real LogFile
-        self.step = logfile.getStep()
-        self.name = logfile.getName()
+    def stringReceived(self, line):
+        channel = int(line[0])
+        if not self.channels or (channel in self.channels):
+            self.chunk_cb((channel, line[1:]))
 
-    def getName(self):
-        return self.name # set in BuildStepStatus.addLog
+class LogFileProducer:
+    """What's the plan?
 
-    def getStep(self):
-        return self.step
+    the LogFile has just one FD, used for both reading and writing.
+    Each time you add an entry, fd.seek to the end and then write.
 
-    def isFinished(self):
-        return True
+    Each reader (i.e. Producer) keeps track of their own offset. The reader
+    starts by seeking to the start of the logfile, and reading forwards.
+    Between each hunk of file they yield chunks, so they must remember their
+    offset before yielding and re-seek back to that offset before reading
+    more data. When their read() returns EOF, they're finished with the first
+    phase of the reading (everything that's already been written to disk).
 
-def getFullLog(stubLog):
-    logNumber = stubLog.step.getLogs().index(stubLog)
-    stubBuild = stubLog.step.build
-    stepNumber = stubBuild.getSteps().index(stubLog.step)
-    buildNumber = stubBuild.number
+    After EOF, the remaining data is entirely in the current entries list.
+    These entries are all of the same channel, so we can do one "".join and
+    obtain a single chunk to be sent to the listener. But since that involves
+    a yield, and more data might arrive after we give up control, we have to
+    subscribe them before yielding. We can't subscribe them any earlier,
+    otherwise they'd get data out of order.
 
-    fullBuild = stubBuild.builder.getFullBuildByNumber(buildNumber)
-    fullStep = fullBuild.getSteps()[stepNumber]
-    fullLog = fullStep.getLogs()[logNumber]
+    We're using a generator in the first place so that the listener can
+    throttle us, which means they're pulling. But the subscription means
+    we're pushing. Really we're a Producer. In the first phase we can be
+    either a PullProducer or a PushProducer. In the second phase we're only a
+    PushProducer.
 
-    return fullLog
+    So the client gives a LogFileConsumer to File.subscribeConsumer . This
+    Consumer must have registerProducer(), unregisterProducer(), and
+    writeChunk(), and is just like a regular twisted.interfaces.IConsumer,
+    except that writeChunk() takes chunks (tuples of (channel,text)) instead
+    of the normal write() which takes just text. The LogFileConsumer is
+    allowed to call stopProducing, pauseProducing, and resumeProducing on the
+    producer instance it is given. """
 
-components.registerAdapter(getFullLog, LogFileStub, interfaces.IStatusLog)
+    subscribed = False
+    BUFFERSIZE = 2048
+
+    def __init__(self, logfile, consumer):
+        self.logfile = logfile
+        self.consumer = consumer
+        self.chunkGenerator = self.getChunks()
+        consumer.registerProducer(self, True)
+
+    def getChunks(self):
+        f = self.logfile.getFile()
+        offset = 0
+        chunks = []
+        p = LogFileScanner(chunks.append)
+        f.seek(offset)
+        data = f.read(self.BUFFERSIZE)
+        offset = f.tell()
+        while data:
+            p.dataReceived(data)
+            while chunks:
+                c = chunks.pop(0)
+                yield c
+            f.seek(offset)
+            data = f.read(self.BUFFERSIZE)
+            offset = f.tell()
+        del f
+
+        # now subscribe them to receive new entries
+        self.subscribed = True
+        self.logfile.watchers.append(self)
+        d = self.logfile.waitUntilFinished()
+
+        # then give them the not-yet-merged data
+        if self.logfile.runEntries:
+            channel = self.logfile.runEntries[0][0]
+            text = "".join([c[1] for c in self.logfile.runEntries])
+            yield (channel, text)
+
+        # now we've caught up to the present. Anything further will come from
+        # the logfile subscription. We add the callback *after* yielding the
+        # data from runEntries, because the logfile might have finished
+        # during the yield.
+        d.addCallback(self.logfileFinished)
+
+    def stopProducing(self):
+        # TODO: should we still call consumer.finish? probably not.
+        self.paused = True
+        self.consumer = None
+        self.done()
+
+    def done(self):
+        if self.chunkGenerator:
+            self.chunkGenerator = None # stop making chunks
+        if self.subscribed:
+            self.logfile.watchers.remove(self)
+            self.subscribed = False
+
+    def pauseProducing(self):
+        self.paused = True
+
+    def resumeProducing(self):
+        self.paused = False
+        try:
+            while not self.paused:
+                chunk = self.chunkGenerator.next()
+                self.consumer.writeChunk(chunk)
+                # we exit this when the consumer says to stop, or we run out
+                # of chunks
+        except StopIteration:
+            # if the generator finished, it will have done releaseFile
+            self.chunkGenerator = None
+        # now everything goes through the subscription, and they don't get to
+        # pause anymore
+
+    def logChunk(self, build, step, logfile, channel, chunk):
+        if self.consumer:
+            self.consumer.writeChunk((channel, chunk))
+
+    def logfileFinished(self, logfile):
+        self.done()
+        if self.consumer:
+            self.consumer.unregisterProducer()
+            self.consumer.finish()
+            self.consumer = None
 
 class LogFile:
-    __implements__ = interfaces.IStatusLog, interfaces.IStatusLogStub
+    """A LogFile keeps all of its contents on disk, in a non-pickle format to
+    which new entries can easily be appended. The file on disk has a name
+    like 12-log-compile-output, under the Builder's directory. The actual
+    filename is generated (before the LogFile is created) by
+    L{Builder.generateLogfileName}.
+
+    @type  parent: L{BuildStepStatus}
+    @param parent: the Step that this log is a part of
+    @type  name: string
+    @param name: the name of this log, typically 'output'
+    @type  logfilename: string
+    @param logfilename: the Builder-relative pathname for the saved entries
+
+    Old LogFile pickles (which kept their contents in .entries) must be
+    upgraded. The L{BuilderStatus} is responsible for doing this, when it
+    loads the L{BuildStatus} into memory. The Build pickle is not modified,
+    so users who go from 0.6.5 back to 0.6.4 don't have to lose their
+    logs."""
+
+    __implements__ = interfaces.IStatusLog,
     finished = False
     length = 0
     progress = None
     chunkSize = 10*1000
     runLength = 0
     runEntries = [] # provided so old pickled builds will getChunks() ok
+    entries = None
+    BUFFERSIZE = 2048
+    filename = None # relative to the Builder's basedir
+    openfile = None
 
-    def __init__(self, parent):
+    def __init__(self, parent, name, logfilename):
         self.step = parent
+        self.name = name
+        self.filename = logfilename
+        assert not os.path.exists(self.getFilename())
+        self.openfile = open(self.getFilename(), "wt+")
+        self.runEntries = []
         self.watchers = []
         self.finishedWatchers = []
-        self.entries = []
-        self.runEntries = []
+
+    def getFilename(self):
+        return os.path.join(self.step.build.builder.basedir, self.filename)
+
+    def hasContents(self):
+        return os.path.exists(self.getFilename())
 
     def getName(self):
-        return self.name # set in BuildStepStatus.addLog
+        return self.name
 
     def getStep(self):
         return self.step
@@ -105,13 +236,77 @@
             self.finishedWatchers.append(d)
         return d
 
+    def getFile(self):
+        if self.openfile:
+            # this is the filehandle we're using to write to the log, so
+            # don't close it!
+            return self.openfile
+        # otherwise they get their own read-only handle
+        return open(self.getFilename(), "rt")
+
     def getText(self):
-        return "".join([e[1] for e in self.entries + self.runEntries
-                        if e[0] in (STDOUT,STDERR)])
+        # this produces one ginormous string
+        return "".join(self.getChunks([STDOUT, STDERR], onlyText=True))
+
     def getTextWithHeaders(self):
-        return "".join([e[1] for e in self.entries + self.runEntries])
-    def getChunks(self):
-        return self.entries + self.runEntries
+        return "".join(self.getChunks(onlyText=True))
+
+    def getChunks(self, channels=[], onlyText=False):
+        # generate chunks for everything that was logged at the time we were
+        # first called, so remember how long the file was when we started.
+        # Don't read beyond that point. The current contents of
+        # self.runEntries will follow.
+
+        # this returns an iterator, which means arbitrary things could happen
+        # while we're yielding. This will faithfully deliver the log as it
+        # existed when it was started, and not return anything after that
+        # point. To use this in subscribe(catchup=True) without missing any
+        # data, you must insure that nothing will be added to the log during
+        # yield() calls.
+
+        f = self.getFile()
+        offset = 0
+        f.seek(0, 2)
+        remaining = f.tell()
+
+        leftover = None
+        if self.runEntries and (not channels or
+                                (self.runEntries[0][0] in channels)):
+            leftover = (self.runEntries[0][0],
+                        "".join([c[1] for c in self.runEntries]))
+
+        # freeze the state of the LogFile by passing a lot of parameters into
+        # a generator
+        return self._generateChunks(f, offset, remaining, leftover,
+                                    channels, onlyText)
+
+    def _generateChunks(self, f, offset, remaining, leftover,
+                        channels, onlyText):
+        chunks = []
+        p = LogFileScanner(chunks.append, channels)
+        f.seek(offset)
+        data = f.read(min(remaining, self.BUFFERSIZE))
+        remaining -= len(data)
+        offset = f.tell()
+        while data:
+            p.dataReceived(data)
+            while chunks:
+                channel, text = chunks.pop(0)
+                if onlyText:
+                    yield text
+                else:
+                    yield (channel, text)
+            f.seek(offset)
+            data = f.read(min(remaining, self.BUFFERSIZE))
+            remaining -= len(data)
+            offset = f.tell()
+        del f
+
+        if leftover:
+            if onlyText:
+                yield leftover[1]
+            else:
+                yield leftover
 
     def subscribe(self, receiver, catchup):
         if self.finished:
@@ -124,7 +319,12 @@
                                   channel, text)
 
     def unsubscribe(self, receiver):
-        self.watchers.remove(receiver)
+        if receiver in self.watchers:
+            self.watchers.remove(receiver)
+
+    def subscribeConsumer(self, consumer):
+        p = LogFileProducer(self, consumer)
+        p.resumeProducing()
 
     # interface used by the build steps to add things to the log
     def logProgressTo(self, progress, name):
@@ -138,7 +338,12 @@
             return
         channel = self.runEntries[0][0]
         text = "".join([c[1] for c in self.runEntries])
-        self.entries.append((channel, text))
+        assert channel < 10
+        f = self.openfile
+        f.seek(0, 2)
+        f.write("%d:%d" % (1 + len(text), channel))
+        f.write(text)
+        f.write(",")
         self.runEntries = []
         self.runLength = 0
 
@@ -168,6 +373,14 @@
 
     def finish(self):
         self.merge()
+        if self.openfile:
+            # we don't do an explicit close, because there might be readers
+            # shareing the filehandle. As soon as they stop reading, the
+            # filehandle will be released and automatically closed. We will
+            # do a sync, however, to make sure the log gets saved in case of
+            # a crash.
+            os.fsync(self.openfile.fileno())
+            del self.openfile
         self.finished = True
         watchers = self.finishedWatchers
         self.finishedWatchers = []
@@ -179,19 +392,19 @@
             del self.progressName
 
     # persistence stuff
-    def stubify(self):
-        return LogFileStub(self)
-
     def __getstate__(self):
         d = self.__dict__.copy()
         del d['step'] # filled in upon unpickling
         del d['watchers']
         del d['finishedWatchers']
+        d['entries'] = [] # let 0.6.4 tolerate the saved log. TODO: really?
         if d.has_key('finished'):
             del d['finished']
         if d.has_key('progress'):
             del d['progress']
             del d['progressName']
+        if d.has_key('openfile'):
+            del d['openfile']
         return d
 
     def __setstate__(self, d):
@@ -201,116 +414,18 @@
         # self.step must be filled in by our parent
         self.finished = True
 
-class OfflineLogFileScanner(basic.NetstringReceiver):
-    def __init__(self, chunk_cb, channels=[]):
-        self.chunk_cb = chunk_cb
-        self.channels = channels
-    def stringReceived(self, line):
-        channel = int(line[0])
-        if not self.channels or (channel in self.channels):
-            self.chunk_cb((channel, line[1:]))
-
-class OfflineLogFile(LogFile):
-    """An OfflineLogFile keeps all of its contents on disk, in a non-pickle
-    format that can be appended easily.
-
-    This behaves like LogFile for each individual entry. The difference is
-    that, when a run of entries are merged together, the merged chunk is
-    written out to disk instead of being stored in self.entries .
-
-    """
-    openfile = None
-
-    def __init__(self, parent, logfilename):
-        self.step = parent
-        self.watchers = []
-        self.finishedWatchers = []
-        self.runEntries = []
+    def upgrade(self, logfilename):
+        """Save our .entries to a new-style offline log file (if necessary),
+        and modify our in-memory representation to use it. The original
+        pickled LogFile (inside the pickled Build) won't be modified."""
         self.filename = logfilename
-
-    def openForWriting(self):
-        assert not self.finished, "you can't add to a finished log"
-        if not self.openfile:
-            f = open(self.step.build.getLogfileNamed(self.filename), "a")
-            self.openfile = f
-        return self.openfile
-
-    def getChunks(self, channels=[], onlyText=False):
-        if self.openfile:
-            # we must close it so we can read it properly. It will be opened
-            # again the next time we try to write to it.
-            self.openfile.close()
-            self.openfile = None
-        f = open(self.step.build.getLogfileNamed(self.filename), "r")
-        chunks = []
-        p = OfflineLogFileScanner(chunks.append, channels)
-        # TODO: if merge() is called while we're paused, we might miss some
-        # data.
-        data = f.read(2000)
-        while data:
-            p.dataReceived(data)
-            for c in chunks:
-                if onlyText:
-                    yield c[1]
-                else:
-                    yield c
-            chunks = []
-            data = f.read(2000)
-        f.close()
-        # TODO: if merge() is called while we're paused, we'll lose some data
-        for channel, text in self.runEntries:
-            if not channels or (channel in channels):
-                if onlyText:
-                    yield text
-                else:
-                    yield (channel, text)
-
-    def getText(self):
-        # this produces one ginormous string
-        return "".join(self.getChunks([STDOUT, STDERR], onlyText=True))
-
-    def getTextWithHeaders(self):
-        return "".join(self.getChunks(onlyText=True))
-
-    def subscribe(self, receiver, catchup):
-        if self.finished:
-            return
-        self.watchers.append(receiver)
-        if catchup:
-            for channel,chunk in self.getChunks():
-                receiver.logChunk(self.step.build, self.step, self,
-                                  channel, chunk)
-
-    def merge(self):
-        # merge all .runEntries (which are all of the same type) into a
-        # single chunk for .entries
-        if not self.runEntries:
-            return
-        channel = self.runEntries[0][0]
-        text = "".join([c[1] for c in self.runEntries])
-        assert channel < 10
-        f = self.openForWriting()
-        f.write("%d:%d" % (1 + len(text), channel))
-        f.write(text)
-        f.write(",")
-        self.runEntries = []
-        self.runLength = 0
-
-    def finish(self):
-        self.merge()
-        if self.openfile:
-            self.openfile.close()
-            self.openfile = None
-        LogFile.finish(self)
-
-    def __getstate__(self):
-        d = LogFile.__getstate__(self)
-        if d.has_key('openfile'):
-            del d['openfile']
-        return d
-
-    def stubify(self):
-        return self
+        if not os.path.exists(self.getFilename()):
+            self.openfile = open(self.getFilename(), "wt")
+            self.finished = False
+            for channel,text in self.entries:
+                self.addEntry(channel, text)
+            self.finish() # releases self.openfile, which will be closed
+        del self.entries
 
 
 class HTMLLogFile:
@@ -345,10 +460,6 @@
     def finish(self):
         pass
 
-    # persistence
-    def stubify(self):
-        return self
-
     def __getstate__(self):
         d = self.__dict__.copy()
         del d['step']
@@ -415,7 +526,7 @@
     @cvar text: list of short texts that describe the command and its status
     @type text2: list of strings
     @cvar text2: list of short texts added to the overall build description
-    @type logs: dict of string -> L{buildbot.status.builder.OfflineLogFile}
+    @type logs: dict of string -> L{buildbot.status.builder.LogFile}
     @ivar logs: logs of steps
     """
     # note that these are created when the Build is set up, before each
@@ -563,9 +674,8 @@
 
     def addLog(self, name):
         assert self.started # addLog before stepStarted won't notify watchers
-        logfilename = self.build.getLogfileName(self.name, name)
-        log = OfflineLogFile(self, logfilename)
-        log.name = name
+        logfilename = self.build.generateLogfileName(self.name, name)
+        log = LogFile(self, name, logfilename)
         self.logs.append(log)
         for w in self.watchers:
             receiver = w.logStarted(self.build, self, log)
@@ -604,7 +714,6 @@
         self.results = results
         for loog in self.logs:
             if not loog.isFinished():
-                log.msg("log '%s' not closed" % loog.getName())
                 loog.finish()
 
         for r in self.updates.keys():
@@ -617,17 +726,8 @@
         for w in watchers:
             w.callback(self)
 
-    # methods to be invoked by our parent IBuildStatus
-
-    def pruneLogs(self):
-        self.logs = []
-
     # persistence
 
-    def stubify(self):
-        self.logs = [loog.stubify() for loog in self.logs]
-        return self
-
     def __getstate__(self):
         d = self.__dict__.copy()
         del d['build'] # filled in when loading
@@ -899,11 +999,22 @@
 
     # persistence stuff
 
-    def getLogfileName(self, stepname, logname):
-        """Return an relative path where this logfile's contents can be
-        stored."""
-        starting_filename = stepname + logname
-        starting_filename = re.sub(r'[^\w\.]', '_', starting_filename)
+    def generateLogfileName(self, stepname, logname):
+        """Return a filename (relative to the Builder's base directory) where
+        the logfile's contents can be stored uniquely.
+
+        The base filename is made by combining our build number, the Step's
+        name, and the log's name, then removing unsuitable characters. The
+        filename is then made unique by appending _0, _1, etc, until it does
+        not collide with any other logfile.
+
+        These files are kept in the Builder's basedir (rather than a
+        per-Build subdirectory) because that makes cleanup easier: cron and
+        find will help get rid of the old logs, but the empty directories are
+        more of a hassle to remove."""
+
+        starting_filename = "%d-log-%s-%s" % (self.number, stepname, logname)
+        starting_filename = re.sub(r'[^\w\.\-]', '_', starting_filename)
         # now make it unique
         unique_counter = 0
         filename = starting_filename
@@ -915,16 +1026,6 @@
             unique_counter += 1
         return filename
 
-    def getLogfileNamed(self, logname):
-        """Return the absolute path of the logfile with the given name"""
-        filename = os.path.join(self.builder.basedir,
-                                "%d-%s" % (self.number, logname))
-        return filename
-
-    def stubify(self):
-        self.steps = [step.stubify() for step in self.steps]
-        return self
-
     def __getstate__(self):
         d = self.__dict__.copy()
         # for now, a serialized Build is always "finished". We will never
@@ -950,6 +1051,21 @@
         self.updates = {}
         self.finishedWatchers = []
 
+    def upgradeLogfiles(self):
+        # upgrade any LogFiles that need it. This must occur after we've been
+        # attached to our Builder, and after we know about all LogFiles of
+        # all Steps (to get the filenames right).
+        assert self.builder
+        for s in self.steps:
+            for l in s.getLogs():
+                if l.filename:
+                    pass # new-style, log contents are on disk
+                else:
+                    logfilename = self.generateLogfileName(s.name, l.name)
+                    # let the logfile update its .filename pointer,
+                    # transferring its contents onto disk if necessary
+                    l.upgrade(logfilename)
+
     def saveYourself(self):
         filename = os.path.join(self.builder.basedir, "%d" % self.number)
         if os.path.isdir(filename):
@@ -994,11 +1110,13 @@
 
     __implements__ = interfaces.IBuilderStatus,
 
-    buildHorizon = 100 # prune builds beyond this
-    stubBuildCacheSize = 30
-    fullBuildCacheSize = 2
-    stepHorizon = 50 # prune steps in builds beyond this
-    logHorizon = 20 # prune logs in builds beyond this
+    # these limit the amount of memory we consume, as well as the size of the
+    # main Builder pickle. The Build and LogFile pickles on disk must be
+    # handled separately.
+    buildCacheSize = 30
+    buildHorizon = 100 # forget builds beyond this
+    stepHorizon = 50 # forget steps in builds beyond this
+
     slavename = None
     category = None
     currentBuild = None
@@ -1020,16 +1138,14 @@
         self.nextBuild = None
         self.eta = None
         self.watchers = []
-        self.fullBuildCache = [] # TODO: age builds out of the cache
-        self.stubBuildCache = []
+        self.buildCache = [] # TODO: age builds out of the cache
 
     # persistence
 
     def __getstate__(self):
         d = self.__dict__.copy()
         d['watchers'] = []
-        del d['fullBuildCache']
-        del d['stubBuildCache']
+        del d['buildCache']
         if self.currentBuild:
             self.currentBuild.saveYourself()
             # TODO: push a 'hey, build was interrupted' event
@@ -1041,14 +1157,13 @@
 
     def __setstate__(self, d):
         self.__dict__ = d
-        self.fullBuildCache = []
-        self.stubBuildCache = []
+        self.buildCache = []
         self.watchers = []
         # self.basedir must be filled in by our parent
         # self.status must be filled in by our parent
 
     def saveYourself(self):
-        for b in self.fullBuildCache:
+        for b in self.buildCache:
             if not b.isFinished:
                 # interrupted build, need to save it anyway.
                 # BuildStatus.saveYourself will mark it as interrupted.
@@ -1069,71 +1184,26 @@
 
     # build cache management
 
-    def addFullBuildToCache(self, build):
-        if build in self.fullBuildCache:
-            return
-        self.fullBuildCache.append(build)
-        # there might be a stripped version of this one in the stub cache
-        for b in self.stubBuildCache:
-            if b.number == build.number:
-                self.stubBuildCache.remove(b)
-                break
-        while len(self.fullBuildCache) > self.fullBuildCacheSize:
-            old = self.fullBuildCache.pop(0)
-            if not old.finished:
-                log.msg("BuilderStatus.addFullBuildToCache: "
-                        "weird, old build %d isn't finished" % old.number)
-                # this can happen if a lot of people hit logfiles of
-                # different builds at the same time: all the corresponding
-                # builds must be pulled fully into memory, and a
-                # currently-active one could be pushed out. We need to keep
-                # it from being stripped until it finishes changing its logs
-                self.fullBuildCache.append(old)
-                break
-            else:
-                old = old.stubify()
-                self.addStubBuildToCache(old)
-
-    def addStubBuildToCache(self, build):
-        if build in self.stubBuildCache:
+    def addBuildToCache(self, build):
+        if build in self.buildCache:
             return
-        self.stubBuildCache.append(build)
-        if len(self.stubBuildCache) > self.stubBuildCacheSize:
-            self.stubBuildCache.pop(0)
-
-    def getStubBuildByNumber(self, number):
-        for build in self.fullBuildCache:
-            if build.number == number:
-                # full builds are ok too
-                return build
-        for build in self.stubBuildCache:
-            if build.number == number:
-                return build
-        # need to load it from disk
-        filename = os.path.join(self.basedir, "%d" % number)
-        # TODO: consider using "%d.stub" to avoid loading in all the build's
-        # logs when we're going to throw them out in a moment. requires two
-        # separate files on disk, though.
-        try:
-            build = pickle.load(open(filename, "r"))
-            build.builder = self
-            build = build.stubify()
-            self.addStubBuildToCache(build)
-            return build
-        except IOError:
-            raise IndexError("no such build %d" % number)
-        except EOFError:
-            raise IndexError("corrupted build pickle %d" % number)
+        self.buildCache.append(build)
+        while len(self.buildCache) > self.buildCacheSize:
+            self.buildCache.pop(0)
 
-    def getFullBuildByNumber(self, number):
-        for build in self.fullBuildCache:
+    def getBuildByNumber(self, number):
+        if self.currentBuild and self.currentBuild.number == number:
+            return self.currentBuild
+        for build in self.buildCache:
             if build.number == number:
                 return build
         filename = os.path.join(self.basedir, "%d" % number)
         try:
             build = pickle.load(open(filename, "r"))
             build.builder = self
-            self.addFullBuildToCache(build)
+            # handle LogFiles from after 0.5.0 and before 0.6.5
+            build.upgradeLogfiles()
+            self.addBuildToCache(build)
             return build
         except IOError:
             raise IndexError("no such build %d" % number)
@@ -1147,9 +1217,6 @@
         # then prune steps in builds past the step horizon
         for b in self.builds[0:-self.stepHorizon]:
             b.pruneSteps()
-        # then prune logs in steps in builds past the log horizon
-        for b in self.builds[0:-self.logHorizon]:
-            b.pruneLogs()
 
     def getETA(self):
         eta = self.ETA # absolute time, set by currentlyWaiting
@@ -1186,7 +1253,7 @@
             return None
 
         try:
-            return self.getStubBuildByNumber(number)
+            return self.getBuildByNumber(number)
         except IndexError:
             return None
 
@@ -1336,7 +1403,7 @@
         assert s.number == self.nextBuildNumber - 1
         self.currentBuild = s
         self.currentBigState = "building"
-        self.addFullBuildToCache(self.currentBuild)
+        self.addBuildToCache(self.currentBuild)
         self.publishState()
 
         # now that the BuildStatus is prepared to answer queries, we can

Index: html.py
===================================================================
RCS file: /cvsroot/buildbot/buildbot/buildbot/status/html.py,v
retrieving revision 1.62
retrieving revision 1.63
diff -u -d -r1.62 -r1.63
--- html.py	12 May 2005 21:55:57 -0000	1.62
+++ html.py	15 May 2005 23:43:57 -0000	1.63
@@ -204,9 +204,13 @@
             data += ("<h2>Logs</h2>\n"
                      "<ul>\n")
             for num in range(len(logs)):
-                data += '<li><a href="%s">%s</a></li>\n' % \
-                        (urllib.quote(request.childLink("%d" % num)),
-                         html.escape(logs[num].getName()))
+                if logs[num].hasContents():
+                    data += '<li><a href="%s">%s</a></li>\n' % \
+                            (urllib.quote(request.childLink("%d" % num)),
+                             html.escape(logs[num].getName()))
+                else:
+                    data += ('<li>%s</li>\n' %
+                             html.escape(logs[num].getName()))
             data += "</ul>\n"
 
         return data
@@ -215,7 +219,9 @@
         logname = path
         try:
             log = self.step.getLogs()[int(logname)]
-            return IHTMLLog(interfaces.IStatusLog(log))
+            if log.hasContents():
+                return IHTMLLog(interfaces.IStatusLog(log))
+            return NoResource("Empty Log '%s'" % logname)
         except (IndexError, ValueError):
             return NoResource("No such Log '%s'" % logname)
 
@@ -528,6 +534,25 @@
 </style>
 """
 
+class ChunkConsumer:
+    __implements__ = interfaces.IStatusLogConsumer,
+    def __init__(self, original, textlog):
+        self.original = original
+        self.textlog = textlog
+    def registerProducer(self, producer, streaming):
+        self.producer = producer
+        self.original.registerProducer(producer, streaming)
+    def unregisterProducer(self):
+        self.original.unregisterProducer()
+    def writeChunk(self, chunk):
+        formatted = self.textlog.content([chunk])
+        try:
+            self.original.write(formatted)
+        except pb.DeadReferenceError:
+            self.producing.stopProducing()
+    def finish(self):
+        self.textlog.finished()
+
 class TextLog(Resource):
     # a new instance of this Resource is created for each client who views
     # it, so we can afford to track the request in the Resource.
@@ -583,31 +608,6 @@
         request.setHeader("content-length", self.original.length)
         return ''
 
-    def resumeProducing(self):
-        try:
-            chunk = self.chunkGenerator.next()
-            data = self.content([chunk])
-            if data:
-                self.req.write(data)
-            return
-        except StopIteration:
-            pass
-        self.req.unregisterProducer()
-        # now subscribe to anything that might happen later
-        self.original.subscribe(self, False)
-        self.subscribed = True
-        d = self.original.waitUntilFinished()
-        d.addCallback(self.finished)
-
-    # TODO: under heavy load (a rogue web crawler hammering all the build log
-    # pages), this method gets called, and we don't implement it, which is
-    # bad.
-    #def pauseProducing(self):
-    #    pass
-
-    def stopProducing(self):
-        pass
-
     def render_GET(self, req):
         self.req = req
 
@@ -619,28 +619,10 @@
         if not self.asText:
             req.write(self.htmlHeader(req))
 
-        self.chunkGenerator = self.original.getChunks()
-        req.registerProducer(self, False)
-        d = req.notifyFinish()
-        d.addErrback(self.stop)
+        self.original.subscribeConsumer(ChunkConsumer(req, self))
         return server.NOT_DONE_YET
 
-    def stop(self, why):
-        if self.subscribed:
-            self.original.unsubscribe(self)
-            self.subscribed = False
-        self.req.unregisterProducer()
-        # our .finished callback may still be fired
-        self.req = None
-
-    def logChunk(self, build, step, log, channel, text):
-        output = self.content([(channel, text)])
-        try:
-            self.req.write(output)
-        except pb.DeadReferenceError:
-            log.unsubscribe(self)
-
-    def finished(self, log):
+    def finished(self):
         if not self.req:
             return
         try:
@@ -745,8 +727,11 @@
         logs = self.original.getLogs()
         for num in range(len(logs)):
             name = logs[num].getName()
-            url = urllib.quote("%s/%d" % (urlbase, num))
-            text.append("<a href=\"%s\">%s</a>" % (url, html.escape(name)))
+            if logs[num].hasContents():
+                url = urllib.quote("%s/%d" % (urlbase, num))
+                text.append("<a href=\"%s\">%s</a>" % (url, html.escape(name)))
+            else:
+                text.append(html.escape(name))
         color = self.original.getColor()
         class_ = "BuildStep " + build_get_class(self.original)
         return Box(text, color, class_=class_)





More information about the Commits mailing list