From: hadaq Date: Wed, 15 Oct 2008 13:41:28 +0000 (+0000) Subject: improved. Sergey. X-Git-Url: https://jspc29.x-matter.uni-frankfurt.de/git/?a=commitdiff_plain;h=403168ed83f7f8ae17d37e32ad74779bc648826b;p=hadesicinga.git improved. Sergey. --- diff --git a/plugins/check_lustre.pl b/plugins/check_lustre.pl index ed868a9..cc36e5a 100755 --- a/plugins/check_lustre.pl +++ b/plugins/check_lustre.pl @@ -8,6 +8,7 @@ use Net::FTP; use Data::Dumper; use IO::Handle; use Time::Local; +use Time::localtime; # the following is for the status server # to communicate with Nagios plugin @@ -16,69 +17,60 @@ use threads::shared; use IO::Socket; use IO::Select; -my $status : shared = "OK"; +my $status : shared = "OK"; +my $time_ls : shared = &getTime(); +my $sleep_time : shared = 120; our $server_port = '50502'; our $protocol = 'tcp'; -threads->new( \&statusServer); +threads->new( \&statusServer ); &main(); exit(0); sub main { - my $counter=0; - my @total; - my $statsize = "0"; + my $time_ls = &getTime(); + my $time_du = 0; + my $lustre_size = 0; + while (1) { - if ($counter == 0) - { - # every 24 hours - my @size = `du -cms /lustre_alpha/hades`; - - foreach my $li (@size) - { - if($li =~/total/) - { - @total = split(" ", $li); - $statsize = $total[0]; - $statsize =sprintf ("%.2f",$statsize/1024/1024); - } - } - $counter = 720; - - print "/lustre_alpha/hades total size: $statsize TB\n"; - } # should be 720 + + if( abs($time_du - $time_ls) > 60*60*24 ){ + $lustre_size = &getTotalSize(); + $time_du = &getTime(); + } my $line =`ls -d /lustre_alpha/hades/beam/sep08`; chomp($line); + $time_ls = &getTime(); + if ($line eq "/lustre_alpha/hades/beam/sep08") { - if ($statsize > 30) { - $status = "CRITICAL - Lustre disk space used: $statsize TB"; + if ($lustre_size > 30) { + $status = "CRITICAL - Lustre hades/beam space used: $lustre_size TB"; } - elsif ($statsize > 25) { - $status = "WARNING - Lustre disk space used: $statsize TB"; + elsif ($lustre_size > 25) { + $status = "WARNING - Lustre hades/beam space used: $lustre_size TB"; } else { - $status = "OK - Lustre disk space used: $statsize TB"; + $status = "OK - Lustre hades/beam space used: $lustre_size TB"; } } else { $status = "CRITICAL - failure of Lustre file system!"; } - $counter --; - - #print "$status \n"; - sleep 120; #should be 120 + sleep $sleep_time; # 2 minutes } } -sub statusServer{ my $server_socket; +sub statusServer{ + + my $server_socket; my $client_socket; my $selector; @@ -102,25 +94,61 @@ sub statusServer{ my $server_socket; if($file_handle == $server_socket) { # create a new socket for this transaction - unless (defined( $client_socket = $server_socket->accept() ) -) + unless (defined( $client_socket = $server_socket->accept() )) { print "ERROR: Cannot open socket to send status!\n"; } - - print $client_socket $status; + #--- report the status + my $current_time = &getTime(); + if( abs($current_time - $time_ls) < 3*$sleep_time ){ + print $client_socket $status; + } + else{ + my $time_diff = int((abs($current_time - $time_ls))/60); + print $client_socket "time out! Last check done: $time_diff min ago"; + } close( $client_socket ); } } } } +} + +sub getTime +{ + #--- get local time in seconds + my $tm = localtime; + my $sec = timegm($tm->sec, # sec 0-60 + $tm->min, # min 0-59 + $tm->hour, # hours 0-23 + $tm->mday, # mday 0-31 + $tm->mon, # month 0-11 + $tm->year); # year 1-138 (since 1900) + + return $sec; } +sub getTotalSize +{ + #--- get total size of lustre beam dir + + my $totsize = 0; + my @size = `du -cms /lustre_alpha/hades/beam`; + + foreach my $line (@size){ + if( $line =~ /total/ ){ + my @total = split(" ", $line); + $totsize = $total[0]; + $totsize = sprintf ("%.2f",$totsize/1024/1024); + } + } + return $totsize; +}