From e204278f1f3e964e3cbba27407e35a3ebe213e6c Mon Sep 17 00:00:00 2001 From: hadaq Date: Tue, 27 Jan 2009 14:53:47 +0000 Subject: [PATCH] Can check Lustre mount and Lustre space used (-s flag). Segrey --- plugins/check_lustre.pl | 76 ++++++++++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 17 deletions(-) diff --git a/plugins/check_lustre.pl b/plugins/check_lustre.pl index cc36e5a..3d8ba4f 100755 --- a/plugins/check_lustre.pl +++ b/plugins/check_lustre.pl @@ -1,5 +1,11 @@ #!/usr/bin/perl -w +######################################################## +# plugin script for Nagios to monitor Lustre # +# # +# Sergey Yurevich # +######################################################## + use strict; use warnings; @@ -9,6 +15,7 @@ use Data::Dumper; use IO::Handle; use Time::Local; use Time::localtime; +use Getopt::Std; # the following is for the status server # to communicate with Nagios plugin @@ -17,9 +24,17 @@ use threads::shared; use IO::Socket; use IO::Select; +our ($opt_s, $opt_h); +getopts('hs'); + +if($opt_h){ + &showHelp(); + exit(0); +} + my $status : shared = "OK"; my $time_ls : shared = &getTime(); -my $sleep_time : shared = 120; +my $sleep_time : shared = 600; # 10 minutes our $server_port = '50502'; our $protocol = 'tcp'; @@ -38,33 +53,31 @@ sub main { while (1) { - if( abs($time_du - $time_ls) > 60*60*24 ){ + if( $opt_s && abs($time_du - $time_ls) > 60*60*24 ){ $lustre_size = &getTotalSize(); $time_du = &getTime(); } - my $line =`ls -d /lustre_alpha/hades/beam/sep08`; + my $line =`ls -d /lustre_alpha/hades/user`; chomp($line); $time_ls = &getTime(); - if ($line eq "/lustre_alpha/hades/beam/sep08") + if ($line eq "/lustre_alpha/hades/user") { - if ($lustre_size > 30) { - $status = "CRITICAL - Lustre hades/beam space used: $lustre_size TB"; - } - elsif ($lustre_size > 25) { - $status = "WARNING - Lustre hades/beam space used: $lustre_size TB"; + if($opt_s){ + $status = "OK - Lustre space used: $lustre_size TB"; + &checkLustreSize($lustre_size); } - else { - $status = "OK - Lustre hades/beam space used: $lustre_size TB"; + else{ + $status = "OK - Lustre is mounted"; } } else { - $status = "CRITICAL - failure of Lustre file system!"; + $status = "CRITICAL - Lustre mount failure!"; } - sleep $sleep_time; # 2 minutes + sleep $sleep_time; # 10 minutes } } @@ -78,7 +91,7 @@ sub statusServer{ IO::Socket::INET->new( LocalPort => $server_port, Proto => 'tcp', Listen => SOMAXCONN ) )) - { + { print "ERROR: Cannot start status server!\n"; } @@ -86,14 +99,14 @@ sub statusServer{ while(1) { - # wait 5 seconds for connections + #--- wait 5 seconds for connections while (my @file_handles = $selector->can_read( 5 )) { foreach my $file_handle (@file_handles) { if($file_handle == $server_socket) { - # create a new socket for this transaction + #--- create a new socket for this transaction unless (defined( $client_socket = $server_socket->accept() )) { print "ERROR: Cannot open socket to send status!\n"; @@ -137,7 +150,7 @@ sub getTotalSize #--- get total size of lustre beam dir my $totsize = 0; - my @size = `du -cms /lustre_alpha/hades/beam`; + my @size = `du -cms /lustre_alpha/hades`; foreach my $line (@size){ if( $line =~ /total/ ){ @@ -150,6 +163,35 @@ sub getTotalSize return $totsize; } +sub checkLustreSize +{ + my $lustre_size = shift; + + if ($lustre_size > 30) { + $status = "CRITICAL - Lustre space used: $lustre_size TB"; + } + elsif ($lustre_size > 25) { + $status = "WARNING - Lustre space used: $lustre_size TB"; + } +} + +sub showHelp +{ + print << 'EOF'; + + Nagios plugin + + This script checks periodically a Lustre mount on + a local machine. The script also runs a status server + in a separate thread for reporting the status to Nagios. + + Usage: check_lustre.pl [-s] [-h] + + -s : Check also a disk space used on Lustre file system. + -h : Print this help. + +EOF +} -- 2.43.0