From: hadaq Date: Fri, 10 Feb 2012 16:39:58 +0000 (+0000) Subject: JAM: introduced restart handler script for icinga X-Git-Url: https://jspc29.x-matter.uni-frankfurt.de/git/?a=commitdiff_plain;h=3273127f78fd7db57a2476195f7003b5f62ed0dc;p=hadesicinga.git JAM: introduced restart handler script for icinga --- diff --git a/plugins/README b/plugins/README index e694f10..0afe34c 100644 --- a/plugins/README +++ b/plugins/README @@ -1,5 +1,9 @@ Plugin Location Comment -------------------------------------------------------------------- +check_hdisks.pl hades27 Remote script. Checks hard disks + using smartctl. Checks RAID using + /proc/mdstat. + check_backup.pl hadeb07 Remote script. Checks backup on hadeb07. Runs status server to report status to Nagios. @@ -27,6 +31,11 @@ my_check_proc_status.pl hadesdaq Run by Nagios to receive status report from remote scripts. +my_check_proc_disk_status.pl hadesdaq Run by Nagios to receive + status report from remote + scripts. Checks provided temperatures + of the remote hard disks. + my_check_dhcp.pl hadesdaq Run by Nagios. Checks if dhcp daemon is running. diff --git a/plugins/check_hdisks.pl b/plugins/check_hdisks.pl index 71140a5..2465130 100755 --- a/plugins/check_hdisks.pl +++ b/plugins/check_hdisks.pl @@ -99,7 +99,7 @@ Usage: Examples: - Run as a daemon and do only RAID checks: + Run as a daemon and do only RAID checks (disable smart checks): check_hdisks.pl -d -s 0 & EOF @@ -107,8 +107,6 @@ EOF sub main() { - my $MDSTAT = "/proc/mdstat"; - #- Get all hard disks &getHardDisks($disks_aref); diff --git a/plugins/my_restart_handler.pl b/plugins/my_restart_handler.pl new file mode 100755 index 0000000..02a2c37 --- /dev/null +++ b/plugins/my_restart_handler.pl @@ -0,0 +1,170 @@ +#!/usr/bin/perl -w + +######################################################## +# restart script on a given list of PCs # +# # +# Sergey Yurevich # +# JAM: extended as icinga restart handler 10-Feb-12 # +######################################################## + +use strict; +use warnings; +use Getopt::Std; +use File::Basename; + +our ($opt_s, $opt_r, $opt_k, $opt_a, $opt_h, $opt_m, $opt_x, $opt_y, $opt_z); +getopts('hs:a:krm:x:y:z:'); + +if($opt_h){ + &showHelp(); + exit(0); +} + +&checkArgs(); + +#--- on these PCs the script should be restarted: +my @PCList = ('lxg0429','lxg0433','lxg0435','lxg0437','lxg0439','lxg0440','lxg0441','lxg0442','lxg0443','lxg0444','lxg0445','lxg0446','lxg0447','lxg0448','lxg0452','lxg0453','lxg0454','lxg0455'); +my $args =""; +if($opt_a){ +$args=$opt_a; #- arguments of script to be restarted +} +&editPCList(); + +&main(); + +exit(0); + +#------------------------ END ------------------------- + +sub main +{ +# first test for icinga handler severities: + if( defined($opt_x) ) + { + if($opt_x ne "CRITICAL"){ + # no action if state is not critical + exit(0); + } + if( defined($opt_y) ){ + if($opt_y eq "SOFT"){ + # for soft case, we check number of retries before reacting + if( defined($opt_z) ){ + if($opt_z < 3 ){ + print "my_restart_handler found soft CRITICAL state with attempt $opt_z , wait another try...\n"; + exit(0); + } + print "my_restart_handler found soft CRITICAL state for $opt_z attempts, take action now!\n"; + } + } + elsif ($opt_y eq "HARD"){ + # for hard case, we take immediate action, i.e we proceed to the restart section below + print "my_restart_handler found hard CRITICAL state, take action now!\n"; + } + + } + + } + + + + + foreach my $PC (@PCList) { + + &printMessage($PC); + + if( $opt_k || $opt_r ){ + #--- get PIDs of running scripts + my $command = "ssh -f $PC \"pidof -x $opt_s\" "; + my $out = `$command`; + chomp($out); + + #--- is there anything to kill? + if($out && $opt_k){ + #--- kill + system("ssh -f $PC \"pidof -x $opt_s | xargs kill -9\" "); + } + elsif($out && $opt_r){ + #--- kill and restart + system("ssh -f $PC \"pidof -x $opt_s | xargs kill -9; sleep 1; $opt_s $args & \" "); + } + elsif($opt_r){ + #--- restart + system("ssh -f $PC \"$opt_s $args & \" "); + } + } + else{ + #--- restart without killing + my $command = "ssh -f $PC \"$opt_s $args& \" "; + system($command); + } + } +} + +sub checkArgs +{ + if( !defined($opt_s) ){ + print "You must provide -s option with an argument!\n"; + print "Read help: my_restart_handler.pl -h\n"; + exit(0); + } + +} + +sub editPCList +{ + if( defined($opt_m) ){ + @PCList = ($opt_m); + } +} + +sub printMessage +{ + my $PC = shift; + + my $prog_name = basename($opt_s); + + if($opt_k){ + print "my_restart_handler: kill $prog_name on $PC ...\n"; + } + elsif($opt_r){ + print "my_restart_handler: restart $prog_name $args on $PC ...\n"; + } + else{ + print "my_restart_handler: start $prog_name $args on $PC ...\n"; + } +} + +sub showHelp +{ + print << 'EOF'; + + Restart Handler Script (SY, JAM 2012) + + This script restarts a script given as an argument. + The script is restarted on a list of machines listed + in @PCList array. Password-less SSH is used to login + to remote PCs, thus, you must set up private/public keys! + The remote script will be restarted under your 'user name'. + For use as icinga handler, arguments x, y, z indicate icinga + $SERVICESTATE$ , $SERVICESTATETYPE$ , $SERVICEATTEMPT$ + to decide when action has to be done + + Usage: my_restart_handler.pl -s [-a ] [-x ] [-y ] [-z ] [-k] [-m ] [-h] + + -s : Name of script to be (re)started. Name must + contain a path to a script on remote PC! + -a : Arguments for a script to be (re)started. + -x : icinga service state (OK,WARNING,UNKNOWN,CRITICAL) + -y : icinga state type (SOFT,HARD) + -z : number of handler calls + -r : Kill a running script before restarting. + -k : Kill a running script without restarting. + -m : Restart script on "pcname" only. + -h : Print this help. + + If there is only -s specified this script will + remotely start program without trying to kill + it first. + +EOF +}