--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+
+######################################################################
+# #
+# Nagios plugin to check hard disks and raid arrays. #
+# To be able to run smartctl under normal user account #
+# you should open permissions under root account: #
+# chmod u+s /usr/sbin/smartctl #
+# #
+# Long self-tests should be executed in crontab: #
+# 0 23 * * 0 /usr/sbin/smartctl -t long /dev/sda > /dev/null #
+# #
+# Sergey Yurevich #
+# #
+######################################################################
+
+use threads;
+use threads::shared;
+use IO::Socket;
+use IO::Select;
+
+use Data::Dumper;
+use FileHandle;
+use Getopt::Long;
+
+our $server_port = '50999';
+our $protocol = 'tcp';
+
+my $opt_help = 0;
+my $opt_verb = 0;
+my $opt_daemon = 0;
+my $opt_smart = 1; # Check disks with smartctl tool
+my $opt_raid = 1; # Check RAID Arrays
+my $opt_sleep = 3600; # 1 hour
+
+GetOptions ('p|port=i' => \$server_port,
+ 'd|daemon' => \$opt_daemon,
+ 's|smart=i' => \$opt_smart,
+ 'r|raid=i' => \$opt_raid,
+ 'v|verb' => \$opt_verb,
+ 'l|sleep=i' => \$opt_sleep,
+ 'h|help' => \$opt_help);
+
+if($opt_help){
+ &help();
+ exit(0);
+}
+
+my $status : shared = "OK";
+
+my @disks; # array with the hard disks
+my $disks_aref = \@disks;
+my %status_hash; # hash with status info for each hard disk
+my $status_href = \%status_hash;
+my @md_list; # array with RAIDs problematic info
+
+#my $MDSTAT = "/home/icinga/nagios/plugins/mdstat";
+my $MDSTAT = "/proc/mdstat";
+my $SMARTCTL = "/usr/sbin/smartctl";
+
+#- Daemonize
+if($opt_daemon){
+ open(STDIN, '>/dev/null');
+ open(STDOUT, '>/dev/null');
+ open(STDERR, '>/dev/null');
+}
+
+threads->new(\&statusServer);
+
+&main();
+
+exit(0);
+
+####################### END OF MAIN ######################
+
+sub help()
+{
+ print "\n";
+ print << 'EOF';
+check_hdisks.pl
+
+ This script checks RAID status in $MDSTAT. It also checks disks
+ using smartctl tool. Smart checks must be of course supported
+ on the given PC. Long smart tests should run as cron jobs:
+ 0 23 * * 0 /usr/sbin/smartctl -t long /dev/sda > /dev/null
+
+Usage:
+
+ Command line: check_hdisks.pl
+ [h|help] : Show this help.
+ [p|port <port>] : Port for the Status Server (default: 50999).
+ [d|daemon] : Run as a daemon.
+ [s|smart <1|0>] : Enable|disable smart checks (default: 1).
+ [r|raid <1|0>] : Enable|disable RAID checks (default: 1).
+ [v|verb] : More verbouse.
+ [l|sleep <time>] : Sleep time in seconds between checks (default: 3600).
+
+Examples:
+
+ Run as a daemon and do only RAID checks:
+ check_hdisks.pl -d -s 0 &
+
+EOF
+}
+
+sub main()
+{
+ my $MDSTAT = "/proc/mdstat";
+
+ #- Get all hard disks
+ &getHardDisks($disks_aref);
+
+ while(1){
+
+ $status = "OK";
+
+ &checkRaid() if($opt_raid);
+ &checkSmart() if($opt_smart);
+ &getStatus();
+ &init2zero();
+
+ sleep($opt_sleep); # 1 hour
+ }
+}
+
+sub getStatus()
+{
+
+ my @crit_list = ();
+ my @warn_list = ();
+ my @ok_list = ();
+
+ #- Check info collected by smartctl
+ foreach my $disk ( %$status_href ){
+ if( defined $status_href->{$disk}->{'FAIL'} ){
+ my $info = $disk . " " . $status_href->{$disk}->{'FAIL'};
+ push(@crit_list, $info);
+ }
+
+ if( defined $status_href->{$disk}->{'WARN'} ){
+ my $info = $disk . " " . $status_href->{$disk}->{'WARN'};
+ push(@warn_list, $info);
+ }
+
+ if( defined $status_href->{$disk}->{'SELF'} ){
+ my $info = $disk . " " . $status_href->{$disk}->{'SELF'};
+ push(@warn_list, $info);
+ }
+
+ if( defined $status_href->{$disk}->{'TEMP'} ){
+ my $temperature = "temp: " . $disk . " " . $status_href->{$disk}->{'TEMP'};
+ push(@ok_list, $temperature);
+ }
+ }
+
+ #- Look at messages from RAID array check
+ my $msg = "";
+ if( @md_list ){
+ foreach my $entry (@md_list){
+ $msg = $msg . " " . $entry;
+ }
+
+ $msg = "Problem with " . $msg;
+ push(@crit_list, $msg);
+ }
+
+ #- Build status message
+ if( @crit_list ){
+ my $msg = "";
+ foreach my $crit (@crit_list){
+ $msg = $msg . " " . $crit . ",";
+ }
+
+ $status = "CRITICAL - " . $msg;
+ }
+ elsif( @warn_list ){
+ my $msg = "";
+ foreach my $warn (@warn_list){
+ $msg = $msg . " " . $warn . ",";
+ }
+
+ $status = "WARNING - " . $msg;
+ }
+ else{
+ my $msg = "";
+ if( @ok_list ){
+ foreach my $ok (@ok_list){
+ $msg = $msg . " " . $ok . ",";
+ }
+ }
+
+ $status = "OK - " . $msg;
+ }
+
+
+ print "status: $status\n" if($opt_verb);
+}
+
+sub init2zero()
+{
+ #- Reinitialize
+ %status_hash = ();
+ @md_list = ();
+}
+
+sub checkSmart()
+{
+ foreach my $disk (@disks){
+ &checkSmartShort($disk);
+ &checkSmartTemp($disk);
+ }
+}
+
+sub checkSmartShort()
+{
+ my ($disk) = @_;
+
+ my $disk_status = `$SMARTCTL -H $disk`;
+
+ if( $disk_status =~ /\n(.*?test result: PASSED\n)/i ||
+ $disk_status =~ /\n(.*?Sense: Ok!\n)/i ){
+ $status_href->{$disk}->{'OK'} = $1;
+ }
+
+ if( $disk_status =~ /\n(.*?OLD[^_age][^\n]*)/i ){
+ $status_href->{$disk}->{'WARN'} = $1;
+ }
+
+ if( $disk_status =~ /\n(.*?[^WHEN_]FAIL[^ED][^\n]*)/i ){
+ $status_href->{$disk}->{'FAIL'} = $1;
+ }
+
+ #- Check results of the long test
+ $disk_status = `$SMARTCTL -l selftest $disk`;
+
+ if( $disk_status =~ /\n*(No\s+self-tests\s+have\s+been\s+logged)*/ ){
+ $status_href->{$disk}->{'SELF'} = $1;
+ }
+ elsif( ! ($disk_status =~ /\n*(Completed\s+without\s+error)*/) ){
+ $status_href->{$disk}->{'SELF'} = "Self-test failed";
+ }
+}
+
+sub checkSmartTemp()
+{
+ my ($disk) = @_;
+
+ my @disk_status = `$SMARTCTL -A $disk`;
+
+ foreach my $line ( @disk_status ){
+ if( $line =~ "194 Temperature_Celsius" ){
+ my @words = split(/ +/, $line);
+ my $temperature = $words[9];
+ chomp($temperature);
+ $status_href->{$disk}->{'TEMP'} = $temperature;
+ }
+ }
+}
+
+sub getHardDisks()
+{
+ my ($disks_aref) = @_;
+
+ my $disk;
+
+ foreach my $letter ('a'..'z'){
+ $disk = "/dev/sd" . $letter;
+ push( @$disks_aref, $disk ) if( -e $disk );
+ $disk = "/dev/hd" . $letter;
+ push( @$disks_aref, $disk ) if( -e $disk );
+ }
+}
+
+sub checkRaid()
+{
+ my ($md, $active);
+
+ open(FH, $MDSTAT);
+
+ #- Now check the mdstat file...
+ while (<FH>) {
+ my $line= $_;
+ if( $line =~ /(md\d{1})\s+:\s+(\w+)/ ){
+ #- Found the raid array
+ $md = $1;
+ $active = $2;
+ }
+ elsif( defined $md ){
+ #- Check if all disks are UP for the current raid array
+ if( $line =~ /\[(\d+)\/(\d+)\]\s+\[(\w+)\]/ ){
+ my $disks_all = $1;
+ my $disks_up = $2;
+ my $disks_status = $3;
+
+ if( $disks_all != $disks_up || $disks_status =~ /_/ ){
+ push(@md_list, $md);
+ }
+
+ $md = "";
+ }
+ }
+ }
+
+ close (FH);
+
+ if( @md_list ){
+ my $msg = "";
+ foreach my $entry (@md_list){
+ $msg = $msg . " " . $entry;
+ }
+
+ $status = "CRITICAL - Problem with $msg";
+ }
+}
+
+sub statusServer{ my $server_socket;
+ my $client_socket;
+ my $selector;
+
+ unless (defined( $server_socket =
+ IO::Socket::INET->new( LocalPort => $server_port,
+ Proto => 'tcp',
+ Listen => SOMAXCONN ) ))
+ {
+ print "ERROR: Cannot start status server!\n";
+ }
+
+ $selector = new IO::Select( $server_socket );
+
+ while(1) {
+
+ # wait 5 seconds for connections
+ while (my @file_handles = $selector->can_read( 5 )) {
+
+ foreach my $file_handle (@file_handles) {
+
+ if($file_handle == $server_socket) {
+
+ # create a new socket for this transaction
+ unless (defined( $client_socket = $server_socket->accept() ))
+ {
+ print "ERROR: Cannot open socket to send status!\n";
+ }
+
+ print $client_socket $status;
+
+ close( $client_socket );
+ }
+ }
+ }
+ }
+}