[Buildbot-commits] [Buildbot] #1784: Buildmaster blocks
Buildbot
nobody at buildbot.net
Thu Feb 3 13:06:32 UTC 2011
#1784: Buildmaster blocks
---------------------+--------------------
Reporter: Schenker | Owner:
Type: defect | Status: new
Priority: major | Milestone: 0.8.4
Version: 0.8.0 | Resolution:
Keywords: |
---------------------+--------------------
Comment (by Schenker):
I ran strace on the master process. As I mentioned above - during the time
when the master is frozen the web page cannot be opened and there's no
activity marked in the twistd.log (I notice that there's no activity in
the twistd.log because every 2 minutes the master checks the SVN for the
latest revision and that is written in the log - when the master is frozen
there's no such activity in the log). But in that time the strace shows
output, including the SVN polling:
{{{
select(12, [3 5 6 8 9 11], [], [], {0, 687206}) = 0 (Timeout)
write(4, "2011-02-03 14:07:00+0200 [-] SVN"..., 47) = 47
pipe([12, 13]) = 0
pipe([14, 15]) = 0
pipe([16, 17]) = 0
clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0x2ab3ffba5ff0) = 19642
close(12) = 0
fcntl(13, F_GETFL) = 0x1 (flags O_WRONLY)
fcntl(13, F_SETFL, O_WRONLY|O_NONBLOCK) = 0
fstat(13, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0
close(15) = 0
fcntl(14, F_GETFL) = 0 (flags O_RDONLY)
fcntl(14, F_SETFL, O_RDONLY|O_NONBLOCK) = 0
close(17) = 0
fcntl(16, F_GETFL) = 0 (flags O_RDONLY)
fcntl(16, F_SETFL, O_RDONLY|O_NONBLOCK) = 0
wait4(19642, 0x7fffb3a25b64, WNOHANG, NULL) = 0
select(17, [3 5 6 8 9 11 13 14 16], [], [], {0, 306986}) = 1 (in [14],
left {0, 294000})
read(14, "<?xml version=\"1.0\"?>\n<log>\n", 8192) = 28
select(17, [3 5 6 8 9 11 13 14 16], [], [], {0, 293722}) = 1 (in [14],
left {0, 232000})
read(14, "<logentry\n revision=\"37510\">\n<"..., 8192) = 1039
select(17, [3 5 6 8 9 11 13 14 16], [], [], {0, 230957}) = 1 (in [14],
left {0, 230957})
read(14, "<logentry\n revision=\"37501\">\n<"..., 8192) = 4016
select(17, [3 5 6 8 9 11 13 14 16], [], [], {0, 230740}) = 1 (in [14],
left {0, 230740})
read(14, "<logentry\n revision=\"37489\">\n<"..., 8192) = 1196
select(17, [3 5 6 8 9 11 13 14 16], [], [], {0, 230546}) = 3 (in [13 14
16], left {0, 228000})
--- SIGCHLD (Child exited) @ 0 (0) ---
write(10, "x", 1) = 1
rt_sigreturn(0x2) = 3
read(14, "", 8192) = 0
close(14) = 0
fcntl(13, F_GETFL) = 0x801 (flags
O_WRONLY|O_NONBLOCK)
fcntl(13, F_SETFL, O_WRONLY) = 0
close(13) = 0
read(16, "", 8192) = 0
close(16) = 0
wait4(19642, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], WNOHANG, NULL) =
19642
write(4, "2011-02-03 14:07:00+0200 [-] svn"..., 85) = 85
write(4, "2011-02-03 14:07:00+0200 [-] svn"..., 72) = 72
write(4, "2011-02-03 14:07:00+0200 [-] SVN"..., 56) = 56
write(4, "2011-02-03 14:07:00+0200 [-] _fi"..., 46) = 46
select(12, [3 5 6 8 9 11], [], [], {0, 218745}) = 1 (in [3], left {0,
218745})
read(3, "x", 8192) = 1
select(12, [3 5 6 8 9 11], [], [], {0, 218513}) = 0 (Timeout)
select(12, [3 5 6 8 9 11], [], [], {0, 17}) = 0 (Timeout)
select(12, [3 5 6 8 9 11], [], [], {0, 999948}) = 0 (Timeout)
}}}
So ... obviously the master is not entirely blocked ... but the result is
the same - about 20 minutes after the master stops resonding the slave is
detached and the build - interrupted with the following output:
{{{
[Failure instance: Traceback (failure with no frames): <class
'twisted.internet.error.ConnectionLost'>: Connection to the other side was
lost in a non-clean fashion.
]
}}}
--
Ticket URL: <http://trac.buildbot.net/ticket/1784#comment:6>
Buildbot <http://buildbot.net/>
Buildbot: build/test automation
More information about the Commits
mailing list