command_line $USER1$/check_by_ssh -l $ARG1$ -H $HOSTADDRESS$ -C "/home/hadaq/nagios/plugins/my_epics.sh -pv $ARG2$ -H 192.168.103.255 -expval $ARG3$"
}
+# restart any process by name if state is critical after 3 attempts
define command {
command_name restart_process
command_line $USER1$/my_restart_handler.pl -r -m $ARG1$@$HOSTADDRESS$ -s $ARG2$ -a $ARG3$ -x $SERVICESTATE$ -y $SERVICESTATETYPE$ -z $SERVICEATTEMPT$
}
+# restart eventbuilder epics if state is critical after 3 attempts
+define command {
+ command_name restart_eb_epics
+ command_line $USER1$/my_restart_handler.pl -m hadaq@lxhadesdaq -s /home/hadaq/trbsoft/daq/evtbuild/start_eb_gbe.pl -a "-i start -n 1-16" -x $SERVICESTATE$ -y $SERVICESTATETYPE$ -z $SERVICEATTEMPT$
+}
+
+# restart oracle export processes if state is critical after 3 attempts
+define command {
+ command_name restart_run2oracle
+ command_line $USER1$/my_restart_handler.pl -m hadaq@lxhadesdaq -s /home/hadaq/trbsoft/daq/oracle/runinfo2orastart_parallel.sh -a "" -x $SERVICESTATE$ -y $SERVICESTATETYPE$ -z $SERVICEATTEMPT$
+}
+
+
+
+
# }
# runinfo2ora with multiple processes:
+# if not all processes are there, we restart everything.
define service{
use remote-service
retry_check_interval 1
notification_interval 30
notification_options c,r
- check_command check_multi_proc_by_ssh!hadaq!runinfo2ora.pl!16!WARNING!
+ check_command check_multi_proc_by_ssh!hadaq!runinfo2ora.pl!16!CRITICAL!
+ event_handler restart_run2oracle
}
####### here check iocs for eventbuilders:
-
+# there must be exactly 4 processes on any active EB server
+# if not, we will restart _all_ iocs on all machines
define service{
use remote-service
hostgroup_name eb-servers-active
retry_check_interval 1
notification_interval 30
notification_options c,w,r
- check_command check_multi_proc_by_ssh!hadaq!SCREEN!4!WARNING!
+ check_command check_multi_proc_by_ssh!hadaq!SCREEN!4!CRITICAL!
+ event_handler restart_eb_epics
}
## direct check of running iocs with fine granularity:
+# note: these will give critical if ioc is there, but eb process is not
+# these will give unknown if ioc is not available