From 640199290530dd0e272a3c250b83db9175450ea7 Mon Sep 17 00:00:00 2001 From: hadaq Date: Wed, 8 Feb 2012 13:38:13 +0000 Subject: [PATCH] JAM: added checks for EB epics services changed idomod output to file, supress log warnings (ido2db not running) first try to get notification contact --- icinga/idomod.cfg | 8 +- icinga/modules/idoutils.cfg | 2 + icinga/objects/commands.cfg | 24 ++- icinga/objects/contacts.cfg | 8 +- icinga/objects/hosts_eb_servers.cfg | 234 +++++++++++++++++++++++++++- 5 files changed, 264 insertions(+), 12 deletions(-) diff --git a/icinga/idomod.cfg b/icinga/idomod.cfg index 8092d6a..db53633 100644 --- a/icinga/idomod.cfg +++ b/icinga/idomod.cfg @@ -19,9 +19,9 @@ instance_name=default # tcpsocket = TCP socket # unixsocket = UNIX domain socket (default) -#output_type=file +output_type=file #output_type=tcpsocket -output_type=unixsocket +#output_type=unixsocket @@ -33,9 +33,9 @@ output_type=unixsocket # of fully qualified domain name of the host that the module should # connect to for sending output. -#output=/var/lib/icinga/ido.dat +output=/var/lib/icinga/ido.dat #output=127.0.0.1 -output=/var/spool/icinga/ido2db.sock +#output=/var/spool/icinga/ido2db.sock diff --git a/icinga/modules/idoutils.cfg b/icinga/modules/idoutils.cfg index 23878bb..47f984a 100644 --- a/icinga/modules/idoutils.cfg +++ b/icinga/modules/idoutils.cfg @@ -4,6 +4,8 @@ # to place a file like this into modules/ by default, and include that # directory in your icinga.cfg + +# uncomment this until we really configured db for it JAM define module{ module_name idomod module_type neb diff --git a/icinga/objects/commands.cfg b/icinga/objects/commands.cfg index c96a455..21d2f81 100644 --- a/icinga/objects/commands.cfg +++ b/icinga/objects/commands.cfg @@ -260,15 +260,25 @@ define command { } #check_proc_by_ssh +# define command { +# command_name check_proc_by_ssh +# command_line $USER1$/check_by_ssh -l $ARG1$ -H $HOSTADDRESS$ -C "/usr/lib/nagios/plugins/my_check_process.pl $ARG2$" +# } define command { command_name check_proc_by_ssh - command_line $USER1$/check_by_ssh -l $ARG1$ -H $HOSTADDRESS$ -C "/usr/lib/nagios/plugins/my_check_process.pl $ARG2$" + command_line $USER1$/check_by_ssh -l $ARG1$ -H $HOSTADDRESS$ -C "/home/hadaq/nagios/plugins/my_check_process.pl $ARG2$" } + #check_multi_proc_by_ssh JAM +# define command { +# command_name check_multi_proc_by_ssh +# command_line $USER1$/check_by_ssh -l $ARG1$ -H $HOSTADDRESS$ -C "/usr/lib/nagios/plugins/my_check_process_multi.pl $ARG2$ $ARG3$ $ARG4$" +# } + define command { command_name check_multi_proc_by_ssh - command_line $USER1$/check_by_ssh -l $ARG1$ -H $HOSTADDRESS$ -C "/usr/lib/nagios/plugins/my_check_process_multi.pl $ARG2$ $ARG3$ $ARG4$" + command_line $USER1$/check_by_ssh -l $ARG1$ -H $HOSTADDRESS$ -C "/home/hadaq/nagios/plugins/my_check_process_multi.pl $ARG2$ $ARG3$ $ARG4$" } @@ -289,3 +299,13 @@ define command { command_name check_adaptec command_line $USER1$/check_by_ssh -l $ARG1$ -H $HOSTADDRESS$ -C "sudo /usr/lib/nagios/plugins/adaptec-check" } + + +#check_by_ssh check EB iocs +# JAM note: need to specify ca access list as defined in ioc st.cmd to prevent caget error due to double network interface on EB servers +define command { + command_name check_epics_pv + command_line $USER1$/check_by_ssh -l $ARG1$ -H $HOSTADDRESS$ -C "/home/hadaq/nagios/plugins/my_epics.sh -pv $ARG2$ -H 192.168.103.255 -expval $ARG3$" +} + + diff --git a/icinga/objects/contacts.cfg b/icinga/objects/contacts.cfg index 4fda02f..82d9e0d 100644 --- a/icinga/objects/contacts.cfg +++ b/icinga/objects/contacts.cfg @@ -33,6 +33,12 @@ define contact{ email icinga@localhost ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ****** } +define contact{ + contact_name joern ; Short name of user + use generic-contact ; Inherit default values from generic-contact template (defined above) + alias Joern Adamczewski-Musch ; Full name of user + email j.adamczewski@gsi.de ; + } ############################################################################### @@ -49,5 +55,5 @@ define contact{ define contactgroup{ contactgroup_name admins alias Icinga Administrators - members icingaadmin + members icingaadmin,joern } diff --git a/icinga/objects/hosts_eb_servers.cfg b/icinga/objects/hosts_eb_servers.cfg index d26d9ec..8207b06 100644 --- a/icinga/objects/hosts_eb_servers.cfg +++ b/icinga/objects/hosts_eb_servers.cfg @@ -39,7 +39,12 @@ define host{ address 192.168.100.15 } - +define host{ + use eb-server ; Name of host template to use + host_name lxhadeb06 + alias lxhadeb06 + address 192.168.100.16 +} define host{ use eb-server ; Name of host template to use @@ -56,7 +61,13 @@ define host{ define hostgroup{ hostgroup_name eb-servers alias EB Servers - members lxhadeb01,lxhadeb02,lxhadeb03,lxhadeb04, lxhadeb05 + members lxhadeb01,lxhadeb02,lxhadeb03,lxhadeb04,lxhadeb05,lxhadeb06 +} + +define hostgroup{ + hostgroup_name eb-servers-active + alias active EB Servers + members lxhadeb02,lxhadeb03,lxhadeb04,lxhadeb05 } ## put lxhadesdaq into hostgroup for better display in web interface JAM @@ -203,7 +214,7 @@ define service{ define service{ use remote-service - hostgroup_name eb-servers + hostgroup_name eb-servers-active service_description daq_disks normal_check_interval 10 retry_check_interval 1 @@ -214,7 +225,7 @@ define service{ define service{ use remote-service - hostgroup_name eb-servers + hostgroup_name eb-servers-active service_description disks cleanup normal_check_interval 10 retry_check_interval 1 @@ -236,6 +247,213 @@ define service{ } +####### here check iocs for eventbuilders: + +define service{ + use remote-service + hostgroup_name eb-servers-active + service_description EB-EPICS procs + normal_check_interval 10 + retry_check_interval 1 + notification_interval 30 + notification_options c,w,r + check_command check_multi_proc_by_ssh!hadaq!SCREEN!4!WARNING! + } + + + +## direct check of running iocs with fine granularity: + + + +####### EB Server 1: +define service{ + use remote-service + host_name lxhadeb05 + service_description EB01-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb01:status!1 + } + +define service{ + use remote-service + host_name lxhadeb05 + service_description EB05-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb05:status!1 + } + +define service{ + use remote-service + host_name lxhadeb05 + service_description EB09-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb09:status!1 + } + +define service{ + use remote-service + host_name lxhadeb05 + service_description EB13-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb13:status!1 + } + + +####### EB Server 2: +define service{ + use remote-service + host_name lxhadeb02 + service_description EB02-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb02:status!1 + } + +define service{ + use remote-service + host_name lxhadeb02 + service_description EB06-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb06:status!1 + } + +define service{ + use remote-service + host_name lxhadeb02 + service_description EB10-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb10:status!1 + } + +define service{ + use remote-service + host_name lxhadeb02 + service_description EB14-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb14:status!1 + } + + +####### EB Server 3: +define service{ + use remote-service + host_name lxhadeb03 + service_description EB03-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb03:status!1 + } + +define service{ + use remote-service + host_name lxhadeb03 + service_description EB07-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb07:status!1 + } + +define service{ + use remote-service + host_name lxhadeb03 + service_description EB11-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb11:status!1 + } + +define service{ + use remote-service + host_name lxhadeb03 + service_description EB15-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb15:status!1 + } + + +####### EB Server 4: +define service{ + use remote-service + host_name lxhadeb04 + service_description EB04-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb04:status!1 + } + +define service{ + use remote-service + host_name lxhadeb04 + service_description EB08-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb08:status!1 + } + +define service{ + use remote-service + host_name lxhadeb04 + service_description EB12-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb12:status!1 + } + +define service{ + use remote-service + host_name lxhadeb04 + service_description EB16-status + normal_check_interval 10 + retry_check_interval 2 + notification_interval 30 + notification_options c,u,r + check_command check_epics_pv!hadaq!HAD:eb16:status!1 + } + + + + + + #--- HARD DISK TEST service group define servicegroup{ @@ -252,10 +470,16 @@ define servicegroup{ define servicegroup{ servicegroup_name EB-disks - alias Eventbuilder data disk balancing/cleanup + alias Eventbuilder disks balancing and cleanup members *,daq_disks,*,disks cleanup; } +define servicegroup{ + servicegroup_name EB-epics + alias Eventbuilder EPICS + members *,EB-EPICS procs,*,EB01-status,*,EB02-status,*,EB03-status,*,EB04-status,*,EB05-status,*,EB06-status,*,EB07-status,*,EB08-status,*,EB09-status,*,EB10-status,*,EB11-status,*,EB12-status,*,EB13-status,*,EB14-status,*,EB15-status,*,EB16-status; + } + ######### here all data disks: -- 2.43.0