[Swift-commit] cog r3480
swift at ci.uchicago.edu
swift at ci.uchicago.edu
Thu Sep 27 20:25:11 CDT 2012
------------------------------------------------------------------------
r3480 | hategan | 2012-09-27 20:23:58 -0500 (Thu, 27 Sep 2012) | 1 line
send a remote log message before dying for easier troubleshooting
------------------------------------------------------------------------
Index: modules/provider-coaster/resources/worker.pl
===================================================================
--- modules/provider-coaster/resources/worker.pl (revision 3479)
+++ modules/provider-coaster/resources/worker.pl (working copy)
@@ -146,6 +146,7 @@
my @PROFILE_EVENTS = ();
my $ID = "-";
+my $CONNECTED = 0;
my $myhost=`hostname`;
$myhost =~ s/\s+$//;
@@ -395,8 +396,9 @@
}
}
if (!$success) {
- die "Failed to connect: $!";
+ dieNicely("Failed to connect: $!");
}
+ $CONNECTED = 1;
$LAST_HEARTBEAT = time();
}
@@ -485,7 +487,8 @@
$r = 0;
}
else {
- wlog(WARN, "Send failed: $!\n") and die "Send failed: $!";
+ $CONNECTED = 0;
+ dieNicely("Send failed: $!");
}
}
my $diff = sprintf("%.8f", time() - $start);
@@ -583,7 +586,7 @@
$$state{"index"} = $index + 1;
my $data = $$state{"data"};
if ($index > $#$data) {
- die "Index out of bounds";
+ dieNicely("Index out of bounds in nextArrayData");
}
return ($index >= $#$data ? FINAL_FLAG : 0, $$data[$index], CONTINUE);
}
@@ -743,8 +746,7 @@
my $lendata = length($data);
if ($lendata < 20) {
- wlog WARN, "Received faulty message (length < 20: $lendata)\n";
- die "Received faulty message (length < 20: $lendata)";
+ dieNicely("Received faulty message (length < 20: $lendata)");
}
my $tag = unpack("V", substr($data, 0, 4));
my $flg = unpack("V", substr($data, 4, 4));
@@ -755,8 +757,7 @@
my $chcsum = ($tag ^ $flg ^ $len);
if ($chcsum != $hcsum) {
- wlog WARN, "Header checksum failed. Computed checksum: $chcsum, checksum: $hcsum\n";
- return;
+ dieNicely("Header checksum failed. Computed checksum: $chcsum, checksum: $hcsum");
}
my $msg = "";
@@ -771,7 +772,7 @@
my $actuallen = length($msg);
wlog(TRACE, " IN: len=$len, actuallen=$actuallen, tag=$tag, flags=$flg, $msg\n");
if ($len != $actuallen) {
- wlog(WARN, "len != actuallen\n");
+ dieNicely("len != actuallen\n");
}
return ($tag, $flg, $msg);
}
@@ -903,7 +904,7 @@
$DATA = $DATA . $buf;
if (length($DATA) == 20) {
# wlog DEBUG, "Received " . unpackData($DATA) . "\n";
- eval { process(unpackData($DATA)); } || (wlog ERROR, "Failed to process data: $@\n" && die "Failed to process data: $@");
+ eval { process(unpackData($DATA)); } || (dieNicely("Failed to process data: $@"));
$DATA = "";
return;
}
@@ -961,7 +962,8 @@
}
if ($eset && @$eset) {
wlog(DEBUG, "Has error\n");
- die "Connection closed\n";
+ $CONNECTED = 0;
+ dieNicely("Connection closed\n");
}
if ($rset && @$rset) {
# can read
@@ -1024,10 +1026,10 @@
my ($state, $tag, $timeout, $flags, $reply) = @_;
if ($timeout) {
- die "Failed to register (timeout)\n";
+ dieNicely("Failed to register (timeout)");
}
elsif ($flags & ERROR_FLAG) {
- die "Failed to register (service returned error: ".join("\n", $reply).")";
+ dieNicely("Failed to register (service returned error: ".join("\n", $reply).")");
}
else {
$ID = $reply;
@@ -1046,13 +1048,11 @@
if ($timeout) {
if (time() - $LAST_HEARTBEAT > 2 * HEARTBEAT_INTERVAL) {
- wlog WARN, "No heartbeats received in a while. Dying.\n";
- die "Lost heartbeat\n";
+ dieNicely("Lost heartbeat");
}
}
elsif ($flags & ERROR_FLAG) {
- wlog WARN, "Heartbeat failed: $reply\n";
- die "Heartbeat failed: $reply\n";
+ dieNicely("Heartbeat failed: $reply");
}
else {
wlog INFO, "Heartbeat acknowledged\n";
@@ -1066,6 +1066,18 @@
encodeInt($statusCode), encodeInt($errorCode), $msg));
}
+sub dieNicely {
+ my ($msg) = @_;
+
+ wlog ERROR, "$msg\n";
+ if ($CONNECTED) {
+ $CONNECTED = 0; // avoid recursive calls to this method
+ queueCmd((nullCB(), "RLOG", "WARN", $msg));
+ sendQueued();
+ }
+ die $msg;
+}
+
sub register {
my ($tag, $timeout, $reply) = @_;
queueReply($tag, ("OK"));
@@ -1158,13 +1170,12 @@
my ($jobid, $file) = @_;
my $dir = dirname($file);
if (-f $dir) {
- die "$jobid Cannot create directory $dir. A file with this name already exists";
+ dieNicely("$jobid Cannot create directory $dir. A file with this name already exists");
}
if (!-d $dir) {
wlog DEBUG, "Creating directory $dir\n";
if (!mkpath($dir)) {
- wlog WARN, "Cannot create directory $dir. $!\n";
- die "Cannot create directory $dir. $!";
+ dieNicely("Cannot create directory $dir. $!");
}
}
}
@@ -1186,7 +1197,7 @@
# and concurrent operations will fail.
my $handle;
if (!open($handle, ">", "$dst")) {
- die "Failed to open $dst: $!";
+ dieNicely("Failed to open $dst: $!");
}
else {
wlog DEBUG, "$jobid Opened $dst\n";
@@ -1405,8 +1416,7 @@
if (! -d $pinned_dir) {
wlog DEBUG, "mkpath: $pinned_dir\n";
mkpath($pinned_dir) ||
- die "mkPinnedDirectory(): " .
- "Could not mkdir: $pinned_dir ($!)\n";
+ dieNicely("mkPinnedDirectory(): Could not mkdir: $pinned_dir ($!)");
}
$PINNED_READY = 1;
}
@@ -1419,7 +1429,7 @@
wlog DEBUG, "link: $dst -> $pinned_dir$rdst\n";
if (! -f "$pinned_dir$rdst") {
link($dst, "$pinned_dir$rdst") ||
- die "getPinnedFile(): Could not link: $pinned_dir$rdst ($!)\n";
+ dieNicely("getPinnedFile(): Could not link: $pinned_dir$rdst ($!)");
}
}
@@ -1430,10 +1440,10 @@
if (! -d $dir) {
wlog DEBUG, "mkpath: $dir\n";
mkpath($dir) ||
- die "getPinnedFile(): Could not mkdir: $dir ($!)\n";
+ dieNicely("getPinnedFile(): Could not mkdir: $dir ($!)");
}
link("$pinned_dir$rdst", $dst) ||
- die "getPinnedFile(): Could not link: $!\n";
+ dieNicely("getPinnedFile(): Could not link: $!");
if ($PINNED{$rdst} == INFLIGHT) {
waitForPinnedFile($rdst, $jobid);
}
@@ -1776,7 +1786,7 @@
sub checkJob() {
my ($tag, $JOBID, $JOB) = @_;
-
+
wlog INFO, "$JOBID Job info received (tag=$tag)\n";
my $executable = $$JOB{"executable"};
if (!(defined $JOBID)) {
@@ -1999,12 +2009,12 @@
if (defined $stdout) {
wlog DEBUG, "STDOUT: $stdout\n";
close STDOUT;
- open STDOUT, ">$stdout" or die "Cannot redirect STDOUT";
+ open STDOUT, ">$stdout" or dieNicely("Cannot redirect STDOUT");
}
if (defined $stderr) {
wlog DEBUG, "STDERR: $stderr\n";
close STDERR;
- open STDERR, ">$stderr" or die "Cannot redirect STDERR";
+ open STDERR, ">$stderr" or dieNicely("Cannot redirect STDERR");
}
close STDIN;
More information about the Swift-commit
mailing list