use Data::Dumper;
use IO::Handle;
use Time::Local;
+use Time::localtime;
# the following is for the status server
# to communicate with Nagios plugin
use IO::Socket;
use IO::Select;
-my $status : shared = "OK";
+my $status : shared = "OK";
+my $time_ls : shared = &getTime();
+my $sleep_time : shared = 120;
our $server_port = '50502';
our $protocol = 'tcp';
-threads->new( \&statusServer);
+threads->new( \&statusServer );
&main();
exit(0);
sub main {
- my $counter=0;
- my @total;
- my $statsize = "0";
+ my $time_ls = &getTime();
+ my $time_du = 0;
+ my $lustre_size = 0;
+
while (1)
{
- if ($counter == 0)
- {
- # every 24 hours
- my @size = `du -cms /lustre_alpha/hades`;
-
- foreach my $li (@size)
- {
- if($li =~/total/)
- {
- @total = split(" ", $li);
- $statsize = $total[0];
- $statsize =sprintf ("%.2f",$statsize/1024/1024);
- }
- }
- $counter = 720;
-
- print "/lustre_alpha/hades total size: $statsize TB\n";
- } # should be 720
+
+ if( abs($time_du - $time_ls) > 60*60*24 ){
+ $lustre_size = &getTotalSize();
+ $time_du = &getTime();
+ }
my $line =`ls -d /lustre_alpha/hades/beam/sep08`;
chomp($line);
+ $time_ls = &getTime();
+
if ($line eq "/lustre_alpha/hades/beam/sep08")
{
- if ($statsize > 30) {
- $status = "CRITICAL - Lustre disk space used: $statsize TB";
+ if ($lustre_size > 30) {
+ $status = "CRITICAL - Lustre hades/beam space used: $lustre_size TB";
}
- elsif ($statsize > 25) {
- $status = "WARNING - Lustre disk space used: $statsize TB";
+ elsif ($lustre_size > 25) {
+ $status = "WARNING - Lustre hades/beam space used: $lustre_size TB";
}
else {
- $status = "OK - Lustre disk space used: $statsize TB";
+ $status = "OK - Lustre hades/beam space used: $lustre_size TB";
}
}
else {
$status = "CRITICAL - failure of Lustre file system!";
}
- $counter --;
-
- #print "$status \n";
- sleep 120; #should be 120
+ sleep $sleep_time; # 2 minutes
}
}
-sub statusServer{ my $server_socket;
+sub statusServer{
+
+ my $server_socket;
my $client_socket;
my $selector;
if($file_handle == $server_socket) {
# create a new socket for this transaction
- unless (defined( $client_socket = $server_socket->accept() )
-)
+ unless (defined( $client_socket = $server_socket->accept() ))
{
print "ERROR: Cannot open socket to send status!\n";
}
-
- print $client_socket $status;
+ #--- report the status
+ my $current_time = &getTime();
+ if( abs($current_time - $time_ls) < 3*$sleep_time ){
+ print $client_socket $status;
+ }
+ else{
+ my $time_diff = int((abs($current_time - $time_ls))/60);
+ print $client_socket "time out! Last check done: $time_diff min ago";
+ }
close( $client_socket );
}
}
}
}
+}
+
+sub getTime
+{
+ #--- get local time in seconds
+ my $tm = localtime;
+ my $sec = timegm($tm->sec, # sec 0-60
+ $tm->min, # min 0-59
+ $tm->hour, # hours 0-23
+ $tm->mday, # mday 0-31
+ $tm->mon, # month 0-11
+ $tm->year); # year 1-138 (since 1900)
+
+ return $sec;
}
+sub getTotalSize
+{
+ #--- get total size of lustre beam dir
+
+ my $totsize = 0;
+ my @size = `du -cms /lustre_alpha/hades/beam`;
+
+ foreach my $line (@size){
+ if( $line =~ /total/ ){
+ my @total = split(" ", $line);
+ $totsize = $total[0];
+ $totsize = sprintf ("%.2f",$totsize/1024/1024);
+ }
+ }
+ return $totsize;
+}