]> jspc29.x-matter.uni-frankfurt.de Git - hadesicinga.git/commitdiff
JAM: Added eventhandler for runinfo2oracle and epics ioc restart
authorhadaq <hadaq>
Mon, 13 Feb 2012 12:36:43 +0000 (12:36 +0000)
committerhadaq <hadaq>
Mon, 13 Feb 2012 12:36:43 +0000 (12:36 +0000)
icinga/objects/commands.cfg
icinga/objects/hosts_eb_servers.cfg

index 21b54b4fa2f1c2c171646aef9d99912109a67eb9..0f8e9a2dbaa4ee0e5b93979361c829b87036ec89 100644 (file)
@@ -308,9 +308,25 @@ define command {
    command_line $USER1$/check_by_ssh -l $ARG1$ -H $HOSTADDRESS$ -C "/home/hadaq/nagios/plugins/my_epics.sh -pv $ARG2$  -H 192.168.103.255 -expval $ARG3$"
 }
 
+# restart any process by name if state is critical after 3 attempts
 define command {
    command_name restart_process
    command_line $USER1$/my_restart_handler.pl -r -m $ARG1$@$HOSTADDRESS$ -s $ARG2$ -a $ARG3$ -x $SERVICESTATE$ -y $SERVICESTATETYPE$ -z $SERVICEATTEMPT$    
 }
 
+# restart eventbuilder epics if state is critical after 3 attempts
+define command {
+   command_name restart_eb_epics
+   command_line $USER1$/my_restart_handler.pl -m hadaq@lxhadesdaq -s /home/hadaq/trbsoft/daq/evtbuild/start_eb_gbe.pl -a "-i start -n 1-16" -x $SERVICESTATE$ -y $SERVICESTATETYPE$ -z $SERVICEATTEMPT$    
+}
+
+# restart oracle export processes if state is critical after 3 attempts
+define command {
+   command_name restart_run2oracle
+   command_line $USER1$/my_restart_handler.pl -m hadaq@lxhadesdaq -s /home/hadaq/trbsoft/daq/oracle/runinfo2orastart_parallel.sh -a "" -x $SERVICESTATE$ -y $SERVICESTATETYPE$ -z $SERVICEATTEMPT$    
+}
+
+
+
+
 
index 2f875fd723fffd6329168f4a2099c81e21b08f80..5862f9bccfbc3b011013420455b7c324cf103e5c 100644 (file)
@@ -136,6 +136,7 @@ define service{
 #        }
 
 # runinfo2ora with multiple processes:
+# if not all processes are there, we restart everything.
 
 define service{
        use                             remote-service
@@ -145,7 +146,8 @@ define service{
        retry_check_interval            1
        notification_interval           30
        notification_options            c,r
-       check_command                   check_multi_proc_by_ssh!hadaq!runinfo2ora.pl!16!WARNING!
+       check_command                   check_multi_proc_by_ssh!hadaq!runinfo2ora.pl!16!CRITICAL!
+       event_handler                   restart_run2oracle
        }
 
 
@@ -265,7 +267,8 @@ define service{
 
 
 ####### here check iocs for eventbuilders:
-
+# there must be exactly 4 processes on any active EB server
+# if not, we will restart _all_ iocs on all machines
 define service{
        use                             remote-service
        hostgroup_name                  eb-servers-active
@@ -274,12 +277,15 @@ define service{
        retry_check_interval            1
        notification_interval           30
        notification_options            c,w,r
-       check_command                   check_multi_proc_by_ssh!hadaq!SCREEN!4!WARNING!
+       check_command                   check_multi_proc_by_ssh!hadaq!SCREEN!4!CRITICAL!
+       event_handler                   restart_eb_epics
        }
 
 
 
 ## direct check of running iocs with fine granularity:
+# note: these will give critical if ioc is there, but eb process is not
+# these will give unknown if ioc is not available