--- /dev/null
+################################################################################
+# Sample command definitions for Nagios 2.6
+#
+# Read the documentation for more information on this configuration file. I've
+# provided some comments here, but things may not be so clear without further
+# explanation, so make sure to read the HTML documentation!
+#
+# Last Modified: 11-21-2006
+#
+################################################################################
+
+
+################################################################################
+# COMMAND DEFINITIONS
+#
+# SYNTAX:
+#
+# define command{
+# template <templatename>
+# name <objectname>
+# command_name <commandname>
+# command_line <commandline>
+# }
+#
+# WHERE:
+#
+# <templatename> = object name of another command definition that should be
+# used as a template for this definition (optional)
+# <objectname> = object name of command definition, referenced by other
+# command definitions that use it as a template (optional)
+# <commandname> = name of the command, as recognized/used by Nagios
+# <commandline> = command line
+#
+################################################################################
+
+
+
+
+################################################################################
+#
+# SAMPLE SERVICE CHECK COMMANDS
+#
+# These are some example service check commands. They may or may not work on
+# your system, as they must be modified for your plugins. See the HTML
+# documentation on the plugins for examples of how to configure command definitions.
+#
+################################################################################
+
+
+################################################################################
+# NOTE: The following 'check_local_...' functions are designed to monitor
+# various metrics on the host that Nagios is running on (i.e. this one).
+################################################################################
+
+# 'check_local_disk' command definition
+define command{
+ command_name check_local_disk
+ command_line /usr/local/nagios/libexec/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
+ }
+
+
+# 'check_local_load' command definition
+define command{
+ command_name check_local_load
+ command_line /usr/local/nagios/libexec/check_load -w $ARG1$ -c $ARG2$
+ }
+
+
+# 'check_local_procs' command definition
+define command{
+ command_name check_local_procs
+ command_line /usr/local/nagios/libexec/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
+ }
+
+
+# 'check_local_users' command definition
+define command{
+ command_name check_local_users
+ command_line /usr/local/nagios/libexec/check_users -w $ARG1$ -c $ARG2$
+ }
+
+# 'check_local_nmap' command definition
+define command{
+ command_name check_local_nmap
+ command_line /usr/bin/nmap -sT -p22 -P0 localhost| grep open 2> /dev/null
+ }
+
+
+################################################################################
+# NOTE: The following 'check_...' commands are used to monitor services on
+# both local and remote hosts.
+################################################################################
+
+# 'check_dns' command definition
+define command{
+ command_name check_dns
+ command_line /usr/local/nagios/libexec/check_dns -H www.yahoo.com -s $HOSTADDRESS$
+ }
+
+
+# 'check_ftp' command definition
+define command{
+ command_name check_ftp
+ command_line /usr/local/nagios/libexec/check_ftp -H $HOSTADDRESS$
+ }
+
+
+# 'check_hpjd' command definition
+define command{
+ command_name check_hpjd
+ command_line /usr/local/nagios/libexec/check_hpjd -H $HOSTADDRESS$ -C public
+ }
+
+
+# 'check_http' command definition
+define command{
+ command_name check_http
+ command_line /usr/local/nagios/libexec/check_http -H $HOSTADDRESS$
+ }
+
+
+# 'check_nntp' command definition
+define command{
+ command_name check_nntp
+ command_line /usr/local/nagios/libexec/check_nntp -H $HOSTADDRESS$
+ }
+
+
+# 'check_ping' command definition
+define command{
+ command_name check_ping
+ command_line /usr/local/nagios/libexec/check_ping -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
+ }
+
+
+# 'check_pop' command definition
+define command{
+ command_name check_pop
+ command_line /usr/local/nagios/libexec/check_pop -H $HOSTADDRESS$
+ }
+
+
+# 'check_smtp' command definition
+define command{
+ command_name check_smtp
+ command_line /usr/local/nagios/libexec/check_smtp -H $HOSTADDRESS$
+ }
+
+
+# 'check_tcp' command definition
+define command{
+ command_name check_tcp
+ command_line /usr/local/nagios/libexec/check_tcp -H $HOSTADDRESS$ -p $ARG1$
+ }
+
+
+# 'check_telnet' command definition
+define command{
+ command_name check_telnet
+ command_line /usr/local/nagios/libexec/check_tcp -H $HOSTADDRESS$ -p 23
+ }
+
+
+# 'check_udp' command definition
+define command{
+ command_name check_udp
+ command_line /usr/local/nagios/libexec/check_udp -H $HOSTADDRESS$ -p $ARG1$
+ }
+
+# 'check_ssh' command definition
+define command{
+ command_name check_ssh
+ command_line /usr/local/nagios/libexec/check_ssh -t $ARG1$ $HOSTADDRESS$
+ }
+
+
+
+################################################################################
+#
+# SAMPLE HOST CHECK COMMANDS
+#
+################################################################################
+
+
+# This command checks to see if a host is "alive" by pinging it
+# The check must result in a 100% packet loss or 5 second (5000ms) round trip
+# average time to produce a critical error.
+# Note: Only one ICMP echo packet is sent (determined by the '-p 1' argument)
+
+# 'check-host-alive' command definition
+define command{
+ command_name check-host-alive
+ command_line /usr/local/nagios/libexec/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 1
+ }
+
+
+
+
+################################################################################
+#
+# SAMPLE NOTIFICATION COMMANDS
+#
+# These are some example notification commands. They may or may not work on
+# your system without modification. As an example, some systems will require
+# you to use "/usr/bin/mailx" instead of "/usr/bin/mail" in the commands below.
+#
+################################################################################
+
+
+# 'host-notify-by-email' command definition
+define command{
+ command_name host-notify-by-email
+ command_line /usr/bin/printf "%b" "***** Nagios 2.6 *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /usr/bin/mail -s "Host $HOSTSTATE$ alert for $HOSTNAME$!" $CONTACTEMAIL$
+ }
+
+
+# 'host-notify-by-epager' command definition
+define command{
+ command_name host-notify-by-epager
+ command_line /usr/bin/printf "%b" "Host '$HOSTALIAS$' is $HOSTSTATE$\nInfo: $HOSTOUTPUT$\nTime: $LONGDATETIME$" | /usr/bin/mail -s "$NOTIFICATIONTYPE$ alert - Host $HOSTNAME$ is $HOSTSTATE$" $CONTACTPAGER$
+ }
+
+# 'notify-by-email' command definition
+define command{
+ command_name notify-by-email
+ command_line /usr/bin/printf "%b" "***** Nagios 2.6 *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | /usr/bin/mail -s "** $NOTIFICATIONTYPE$ alert - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
+ }
+
+
+# 'notify-by-epager' command definition
+define command{
+ command_name notify-by-epager
+ command_line /usr/bin/printf "%b" "Service: $SERVICEDESC$\nHost: $HOSTNAME$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\nInfo: $SERVICEOUTPUT$\nDate: $LONGDATETIME$" | /usr/bin/mail -s "$NOTIFICATIONTYPE$: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$" $CONTACTPAGER$
+ }
+
+
+
+
+
+################################################################################
+#
+# SAMPLE PERFORMANCE DATA COMMANDS
+#
+# These are sample performance data commands that can be used to send performance
+# data output to two text files (one for hosts, another for services). If you
+# plan on simply writing performance data out to a file, consider using the
+# host_perfdata_file and service_perfdata_file options in the main config file.
+#
+################################################################################
+
+
+# 'process-host-perfdata' command definition
+define command{
+ command_name process-host-perfdata
+ command_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /usr/local/nagios/var/host-perfdata.out
+ }
+
+
+# 'process-service-perfdata' command definition
+define command{
+ command_name process-service-perfdata
+ command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perfdata.out
+ }
+
+
+########################################################################
+#
+# Remote host check commands go first
+#
+########################################################################
+
+#check_proc_qa-dst_by_ssh
+define command {
+ command_name check_proc_qa-dst_by_ssh
+ command_line /usr/local/nagios/libexec/check_by_ssh -H $HOSTADDRESS$ -C "/misc/hadaq/nagios/nagios-plugins-1.4.5/plugins-scripts/my_check_process_qa-dst.pl $ARG1$ $ARG2$ $ARG3$"
+
+}
+
+#check_proc_by_ssh
+define command {
+ command_name check_proc_by_ssh
+ command_line /usr/local/nagios/libexec/check_by_ssh -H $HOSTADDRESS$ -C "/misc/hadaq/nagios/nagios-plugins-1.4.5/plugins-scripts/my_check_process.pl $ARG1$"
+
+}
+
+#check_proc2_by_ssh
+define command {
+ command_name check_proc2_by_ssh
+ command_line /usr/local/nagios/libexec/check_by_ssh -H $HOSTADDRESS$ -C "/usr/local/nagios/libexec/my_check_process.pl $ARG1$"
+
+}
+
+#check_proc2
+define command {
+ command_name check_proc2
+ command_line /usr/local/nagios/libexec/my_check_process.pl $ARG1$
+}
+
+#check_by_ssh check_disk on lxg0447 (special command because of a path to check_disk on lxg0447)
+define command {
+ command_name check_disk_by_ssh_lxg0447
+ command_line /usr/local/nagios/libexec/check_by_ssh -H $HOSTADDRESS$ -C "/misc/hadaq/nagios/nagios-plugins-1.4.5/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"
+}
+
+#check_by_ssh check_disk on lxg0451 (special command because of a path to check_disk on lxg0451)
+define command {
+ command_name check_disk_by_ssh_lxg0451
+ command_line /usr/local/nagios/libexec/check_by_ssh -H $HOSTADDRESS$ -C "/misc/hadaq/nagios/nagios-plugins-1.4.5/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"
+}
+
+#check_by_ssh check_disk
+define command {
+ command_name check_disk_by_ssh
+ command_line /usr/local/nagios/libexec/check_by_ssh -H $HOSTADDRESS$ -C "/usr/local/nagios/libexec/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$"
+}
+
+#check_by_ssh my_check_raid.pl
+define command {
+ command_name check_raid_by_ssh
+ command_line /usr/local/nagios/libexec/check_by_ssh -H $HOSTADDRESS$ -C "/usr/local/nagios/libexec/my_check_raid.pl"
+}
+
+#check_by_ssh check_load
+define command {
+ command_name check_load_by_ssh
+ command_line /usr/local/nagios/libexec/check_by_ssh -H $HOSTADDRESS$ -C "/usr/local/nagios/libexec/check_load -w $ARG1$,$ARG2$,$ARG3$ -c $ARG4$,$ARG5$,$ARG6$"
+}
+
+#check_load
+define command {
+ command_name check_load
+ command_line /usr/local/nagios/libexec/check_load -w $ARG1$,$ARG2$,$ARG3$ -c $ARG4$,$ARG5$,$ARG6$
+}
+
+#my_check_raid.pl
+define command {
+ command_name check_raid
+ command_line /usr/local/nagios/libexec/my_check_raid.pl
+}
+
+#my_check_archivist.pl
+define command {
+ command_name check_archivist
+ command_line /usr/local/nagios/libexec/my_check_archivist.pl $HOSTADDRESS$ $ARG1$
+}
+
+#my_check_disk_smartctl.pl
+define command {
+ command_name check_disk_smartctl
+ command_line /usr/local/nagios/libexec/my_check_disk_smartctl.pl -d $ARG1$ -d $ARG2$
+}
+
+#my_check_disk_smartctl.pl
+define command {
+ command_name check_disk_smartctl_by_ssh
+ command_line /usr/local/nagios/libexec/check_by_ssh -H $HOSTADDRESS$ -C "/usr/local/nagios/libexec/my_check_disk_smartctl.pl $ARG1$ $ARG2$"
+}
+
+#my_check_disk_smartctl.pl
+define command {
+ command_name check_disk_smartctl_temp
+ command_line /usr/local/nagios/libexec/my_check_disk_smartctl.pl -d $ARG1$ -d $ARG2$ -t -w $ARG3$ -c $ARG4$
+}
+
+#my_check_disk_smartctl.pl
+define command {
+ command_name check_disk_smartctl_temp_by_ssh
+ command_line /usr/local/nagios/libexec/check_by_ssh -H $HOSTADDRESS$ -C "/usr/local/nagios/libexec/my_check_disk_smartctl.pl -d $ARG1$ -d $ARG2$ -t -w $ARG3$ -c $ARG4$"
+}
+
+#my_check_dhcp.pl
+define command {
+ command_name check_dhcp
+ command_line /usr/local/nagios/libexec/my_check_dhcp.pl $ARG1$
+}
+
+#my_check_proc_status.pl
+define command {
+ command_name check_proc_status
+ command_line /usr/local/nagios/libexec/my_check_proc_status.pl $HOSTADDRESS$ $ARG1$ $ARG2$
+}
+
+#my_check_eblog_status.pl
+define command {
+ command_name check_eblog
+ command_line /usr/local/nagios/libexec/my_check_eblog_status.pl $HOSTADDRESS$ $ARG1$
+}
+
+#my_epics.sh
+define command {
+ command_name check_epics
+ command_line /usr/local/nagios/libexec/my_epics.sh -pv $ARG1$
+}
\ No newline at end of file
--- /dev/null
+# 'linux-admins' contact group definition
+define contactgroup{
+ contactgroup_name linux-admins
+ alias Linux Administrators
+ members hadaq,Michael,Ingo
+ }
+
+# 'qa-dst-admins' contact group definition
+#define contactgroup{
+# contactgroup_name qa-dst-admins
+# alias online QA/DST Administrators
+# members hadaq,Jacek,Malgorzata
+# }
\ No newline at end of file
--- /dev/null
+# 'nagios' contact definition
+define contact{
+ contact_name hadaq
+ alias Nagios Admin
+ service_notification_period 24x7
+ host_notification_period 24x7
+ service_notification_options w,u,c,r
+ host_notification_options d,u,r
+ service_notification_commands notify-by-email
+ host_notification_commands host-notify-by-email
+ email s.yurevich@gsi.de
+ }
+
+define contact{
+ contact_name Michael
+ alias DAQ Expert
+ service_notification_period 24x7
+ host_notification_period 24x7
+ service_notification_options w,u,c,r
+ host_notification_options d,u,r
+ service_notification_commands notify-by-email
+ host_notification_commands host-notify-by-email
+ email m.traxler@gsi.de
+ }
+
+define contact{
+ contact_name Ingo
+ alias DAQ Expert 2
+ service_notification_period 24x7
+ host_notification_period 24x7
+ service_notification_options w,u,c,r
+ host_notification_options d,u,r
+ service_notification_commands notify-by-email
+ host_notification_commands host-notify-by-email
+ email froehlich@physik.uni-frankfurt.de
+ }
+
+define contact{
+ contact_name Jacek
+ alias QA/DST Expert
+ service_notification_period 24x7
+ host_notification_period 24x7
+ service_notification_options w,u,c,r
+ host_notification_options d,u,r
+ service_notification_commands notify-by-email
+ host_notification_commands host-notify-by-email
+ email otwinow@hades2.if.uj.edu.pl
+ }
+
+define contact{
+ contact_name Malgorzata
+ alias QA/DST Expert 2
+ service_notification_period 24x7
+ host_notification_period 24x7
+ service_notification_options w,u,c,r
+ host_notification_options d,u,r
+ service_notification_commands notify-by-email
+ host_notification_commands host-notify-by-email
+ email M.Sudol@gsi.de
+ }
\ No newline at end of file
--- /dev/null
+# 'linux-boxes' host group definition
+define hostgroup{
+ hostgroup_name vmecpu-group
+ alias VME CPUS
+# contact_groups <edit-this> ; This needs to be the same value as the value located in service.cfg file. Nagios 2.5 produces an error if you define this.
+ members hadc01,hadc02,hadc03,hadc04,hadc05,hadc06,hadc07,hadc08,hadc09,hadc10,hadc11,hadc12,hadc13,hadc14,hadc15,hadc16,hadc17;
+ }
+
+# 'eb-servers' host group definition
+define hostgroup{
+ hostgroup_name hadeb-group
+ alias EB Servers
+# contact_groups <edit-this> ; This needs to be the same value as the value located in service.cfg file. Nagios 2.5 produces an error if you define this.
+ members hadeb01,hadeb03,hadeb04,hadeb05,hadeb06a,hadeb07,lxhadesdaq
+ }
+
+# lxg-hosts group definition
+define hostgroup{
+ hostgroup_name lxg-group
+ alias lxg hosts
+# contact_groups <edit-this> ; This needs to be the same value as the value located in service.cfg file. Nagios 2.5 produces an error if you define this.
+ members lxg0447,lxg0411,lxg0451,lxg0434,lxg0440,lxg0441,lxg0442,lxg0443,lxg0444,lxg0430,lxg0438,lxg0449,lxg0450
+ }
+
+# hades-hosts group definition
+define hostgroup{
+ hostgroup_name hades-group
+ alias hades hosts
+# contact_groups <edit-this> ; This needs to be the same value as the value located in service.cfg file. Nagios 2.5 produces an error if you define this.
+ members hades25,hades17,hades27
+ }
+
+# scs-hosts group definition
+define hostgroup{
+ hostgroup_name scs-group
+ alias scs hosts
+# contact_groups <edit-this> ; This needs to be the same value as the value located in service.cfg file. Nagios 2.5 produces an error if you define this.
+ members hadsc1
+ }
--- /dev/null
+# Generic host definition template
+define host{
+ name generic-host ; The name of this host template
+ notifications_enabled 1 ; Host notifications are enabled
+ event_handler_enabled 1 ; Host event handler is enabled
+ flap_detection_enabled 1 ; Flap detection is enabled
+ process_perf_data 1 ; Process performance data
+ retain_status_information 1 ; Retain status information
+ retain_nonstatus_information 1 ; Retain non-status information
+
+ register 0 ; DONT REGISTER, JUST A TEMPLATE!
+ }
+
+define host{
+ name vme-cpu ; The name of this host template
+ notifications_enabled 1 ; Host notifications are enabled
+ event_handler_enabled 1 ; Host event handler is enabled
+ flap_detection_enabled 1 ; Flap detection is enabled
+ process_perf_data 1 ; Process performance data
+ retain_status_information 1 ; Retain status information
+ retain_nonstatus_information 1 ; Retain non-status information
+ check_command check-host-alive
+ check_period 24x7
+ contact_groups linux-admins
+ max_check_attempts 5
+ notification_interval 120
+ notification_period 24x7
+ notification_options d,u,r
+
+ register 0 ; DONT REGISTER, JUST A TEMPLATE!
+ }
+
+define host{
+ name hadeb-host ; The name of this host template
+ notifications_enabled 1 ; Host notifications are enabled
+ event_handler_enabled 1 ; Host event handler is enabled
+ flap_detection_enabled 1 ; Flap detection is enabled
+ process_perf_data 1 ; Process performance data
+ retain_status_information 1 ; Retain status information
+ retain_nonstatus_information 1 ; Retain non-status information
+ check_command check-host-alive
+ check_period 24x7
+ contact_groups linux-admins
+ max_check_attempts 5
+ notification_interval 120
+ notification_period 24x7
+ notification_options d,u,r
+
+ register 0 ; DONT REGISTER, JUST A TEMPLATE!
+ }
+
+define host{
+ name lxg-host ; The name of this host template
+ notifications_enabled 1 ; Host notifications are enabled
+ event_handler_enabled 1 ; Host event handler is enabled
+ flap_detection_enabled 1 ; Flap detection is enabled
+ process_perf_data 1 ; Process performance data
+ retain_status_information 1 ; Retain status information
+ retain_nonstatus_information 1 ; Retain non-status information
+ check_command check-host-alive
+ check_period 24x7
+ contact_groups linux-admins
+ max_check_attempts 5
+ notification_interval 120
+ notification_period 24x7
+ notification_options d,u,r
+
+ register 0 ; DONT REGISTER, JUST A TEMPLATE!
+ }
+
+define host{
+ name hades-host ; The name of this host template
+ notifications_enabled 1 ; Host notifications are enabled
+ event_handler_enabled 1 ; Host event handler is enabled
+ flap_detection_enabled 1 ; Flap detection is enabled
+ process_perf_data 1 ; Process performance data
+ retain_status_information 1 ; Retain status information
+ retain_nonstatus_information 1 ; Retain non-status information
+ check_command check-host-alive
+ check_period 24x7
+ contact_groups linux-admins
+ max_check_attempts 5
+ notification_interval 120
+ notification_period 24x7
+ notification_options d,u,r
+
+ register 0 ; DONT REGISTER, JUST A TEMPLATE!
+ }
+
+define host{
+ name scs-host ; The name of this host template
+ notifications_enabled 1 ; Host notifications are enabled
+ event_handler_enabled 1 ; Host event handler is enabled
+ flap_detection_enabled 1 ; Flap detection is enabled
+ process_perf_data 1 ; Process performance data
+ retain_status_information 1 ; Retain status information
+ retain_nonstatus_information 1 ; Retain non-status information
+ check_command check-host-alive
+ check_period 24x7
+ contact_groups linux-admins
+ max_check_attempts 5
+ notification_interval 120
+ notification_period 24x7
+ notification_options d,u,r
+
+ register 0 ; DONT REGISTER, JUST A TEMPLATE!
+ }
+
+# 'localhost' host definition
+define host{
+ name localhost
+ use generic-host ; Name of host template to use
+ host_name hadesdaq
+ alias nagios server
+ address 127.0.0.1
+ check_command check-host-alive
+ check_period 24x7
+ contact_groups linux-admins
+ max_check_attempts 5
+ notification_interval 120
+ notification_period 24x7
+ notification_options d,u,r
+ register 1
+ }
+
+# hadeb01 host definition
+define host{
+ name hadeb01
+ use hadeb-host ; Name of host template to use
+ host_name hadeb01
+ alias old eb server
+ address 140.181.96.30
+ register 1
+ }
+
+# hadeb03 host definition
+define host{
+ name hadeb03
+ use hadeb-host ; Name of host template to use
+ host_name hadeb03
+ alias backup server
+ address 140.181.97.118
+ register 1
+ }
+
+# hadeb04 host definition
+define host{
+ name hadeb04
+ use hadeb-host ; Name of host template to use
+ host_name hadeb04
+ alias server
+ address 140.181.83.152
+ register 1
+ }
+
+# hadeb05 host definition
+define host{
+ name hadeb05
+ use hadeb-host ; Name of host template to use
+ host_name hadeb05
+ alias server
+ address 140.181.93.18
+ register 1
+ }
+
+# hadeb06 host definition
+define host{
+ name hadeb06a
+ use hadeb-host ; Name of host template to use
+ host_name hadeb06a
+ alias server
+ address 140.181.93.112
+ register 1
+ }
+
+#hadeb07
+define host{
+ name hadeb07
+ use hadeb-host ; Name of host template to use
+ host_name hadeb07
+ alias backup server
+ address 140.181.103.216
+ register 1
+ }
+
+#lxhadesdaq host definition
+define host{
+ name lxhadesdaq
+ use generic-host ; Name of host template to use
+ host_name lxhadesdaq
+ alias main server
+ address 140.181.75.158
+ check_command check-host-alive
+ check_period 24x7 ; new
+ contact_groups linux-admins ; new
+ max_check_attempts 5
+ notification_interval 120
+ notification_period 24x7
+ notification_options d,u,r
+ register 1
+ }
+
+#hadc01
+define host{
+ name hadc01
+ use vme-cpu ; Name of host template to use
+ host_name hadc01
+ alias ---
+ address 140.181.82.98
+ register 1
+ }
+
+#hadc02
+define host{
+ name hadc02
+ use vme-cpu ; Name of host template to use
+ host_name hadc02
+ alias MDC-1 readout in cave
+ address 140.181.84.20
+ register 1
+ }
+
+#hadc03
+define host{
+ name hadc03
+ use vme-cpu ; Name of host template to use
+ host_name hadc03
+ alias TOF-0 readout in cave
+ address 140.181.87.78
+ register 1
+ }
+
+#hadc04
+define host{
+ name hadc04
+ use vme-cpu ; Name of host template to use
+ host_name hadc04
+ alias TOF-1 readout in cave
+ address 140.181.87.80
+ register 1
+ }
+
+#hadc05
+define host{
+ name hadc05
+ use vme-cpu ; Name of host template to use
+ host_name hadc05
+ alias TOF-2 readout in cave
+ address 140.181.87.82
+ register 1
+ }
+
+#hadc06
+define host{
+ name hadc06
+ use vme-cpu ; Name of host template to use
+ host_name hadc06
+ alias TOF-3 readout in cave
+ address 140.181.87.84
+ register 1
+ }
+
+#hadc07
+define host{
+ name hadc07
+ use vme-cpu ; Name of host template to use
+ host_name hadc07
+ alias TOF-4 readout in cave
+ address 140.181.87.86
+ register 1
+ }
+
+#hadc08
+define host{
+ name hadc08
+ use vme-cpu ; Name of host template to use
+ host_name hadc08
+ alias Matching Unit
+ address 140.181.87.88
+ register 1
+ }
+
+#hadc09
+define host{
+ name hadc09
+ use vme-cpu ; Name of host template to use
+ host_name hadc09
+ alias Ingos lab in Frankfurt
+ address 140.181.87.90
+ register 1
+ }
+
+#hadc10
+define host{
+ name hadc10
+ use vme-cpu ; Name of host template to use
+ host_name hadc10
+ alias Shower
+ address 140.181.87.92
+ register 1
+ }
+
+#hadc11
+define host{
+ name hadc11
+ use vme-cpu ; Name of host template to use
+ host_name hadc11
+ alias RICH1, Torte TU-Munchen
+ address 140.181.87.94
+ register 1
+ }
+
+#hadc12
+define host{
+ name hadc12
+ use vme-cpu ; Name of host template to use
+ host_name hadc12
+ alias RICH1
+ address 140.181.87.96
+ register 1
+ }
+
+#hadc13
+define host{
+ name hadc13
+ use vme-cpu ; Name of host template to use
+ host_name hadc13
+ alias RICH2
+ address 140.181.87.98
+ register 1
+ }
+
+#hadc14
+define host{
+ name hadc14
+ use vme-cpu ; Name of host template to use
+ host_name hadc14
+ alias RICH3
+ address 140.181.87.100
+ register 1
+ }
+
+#hadc15
+define host{
+ name hadc15
+ use vme-cpu ; Name of host template to use
+ host_name hadc15
+ alias MDC-0 readout in cave
+ address 140.181.87.102
+ register 1
+ }
+
+#hadc16
+define host{
+ name hadc16
+ use vme-cpu ; Name of host template to use
+ host_name hadc16
+ alias EE-Lab, GSI
+ address 140.181.87.104
+ register 1
+ }
+
+#hadc17
+define host{
+ name hadc17
+ use vme-cpu ; Name of host template to use
+ host_name hadc17
+ alias Lab in Giessen, Tiago
+ address 140.181.87.106
+ register 1
+ }
+
+#lxg0411
+define host{
+ name lxg0411
+ use lxg-host ; Name of host template to use
+ host_name lxg0411
+ alias QA Server (Go4)
+ address 140.181.74.222
+ register 1
+ }
+
+#lxg0447
+define host{
+ name lxg0447
+ use lxg-host ; Name of host template to use
+ host_name lxg0447
+ alias QA RAM-Disk
+ address 140.181.92.234
+ register 1
+ }
+
+#lxg0430
+define host{
+ name lxg0430
+ use lxg-host ; Name of host template to use
+ host_name lxg0430
+ alias online DST
+ address 140.181.67.145
+ register 1
+ }
+
+#lxg0434
+define host{
+ name lxg0434
+ use lxg-host ; Name of host template to use
+ host_name lxg0434
+ alias EPICS Oracle
+ address 140.181.84.32
+ register 1
+ }
+
+#lxg0438
+define host{
+ name lxg0438
+ use lxg-host ; Name of host template to use
+ host_name lxg0438
+ alias Rossendorf PC
+ address 140.181.84.40
+ register 1
+ }
+
+
+#lxg0440
+define host{
+ name lxg0440
+ use lxg-host ; Name of host template to use
+ host_name lxg0440
+ alias RICH acc PC
+ address 140.181.92.220
+ register 1
+ }
+
+#lxg0441
+define host{
+ name lxg0441
+ use lxg-host ; Name of host template to use
+ host_name lxg0441
+ alias MDC acc PC
+ address 140.181.92.222
+ register 1
+ }
+
+#lxg0442
+define host{
+ name lxg0442
+ use lxg-host ; Name of host template to use
+ host_name lxg0442
+ alias Start/Veto/Trigger acc PC
+ address 140.181.92.224
+ register 1
+ }
+
+#lxg0443
+define host{
+ name lxg0443
+ use lxg-host ; Name of host template to use
+ host_name lxg0443
+ alias TOF/TOFino acc PC
+ address 140.181.92.226
+ register 1
+ }
+
+#lxg0444
+define host{
+ name lxg0444
+ use lxg-host ; Name of host template to use
+ host_name lxg0444
+ alias Shower acc PC
+ address 140.181.92.228
+ register 1
+ }
+
+#lxg0449
+define host{
+ name lxg0449
+ use lxg-host ; Name of host template to use
+ host_name lxg0449
+ alias pc in cave
+ address 140.181.102.238
+ register 1
+ }
+
+#lxg0450
+define host{
+ name lxg0450
+ use lxg-host ; Name of host template to use
+ host_name lxg0450
+ alias pc in cave
+ address 140.181.102.240
+ register 1
+ }
+
+#lxg0451
+define host{
+ name lxg0451
+ use lxg-host ; Name of host template to use
+ host_name lxg0451
+ alias online DST PC1
+ address 140.181.103.214
+ register 1
+ }
+
+#hades25
+define host{
+ name hades25
+ use hades-host ; Name of host template to use
+ host_name hades25
+ alias Slow Control System
+ address 140.181.107.26
+ register 1
+ }
+
+#hades17 (used to be hades26)
+define host{
+ name hades17
+ use hades-host ; Name of host template to use
+ host_name hades17
+ alias - System
+ address 140.181.100.181 ; used to be 140.181.107.28 (hades26)
+ register 1
+ }
+
+#hades27
+define host{
+ name hades27
+ use hades-host ; Name of host template to use
+ host_name hades27
+ alias - System
+ address 140.181.107.30
+ register 1
+ }
+
+#hadsc1
+define host{
+ name hadsc1
+ use scs-host ; Name of host template to use
+ host_name hadsc1
+ alias - System
+ address 140.181.111.196
+ register 1
+ }
--- /dev/null
+##############################################################################
+#
+# NAGIOS.CFG - Sample Main Config File for Nagios 2.6
+#
+# Read the documentation for more information on this configuration
+# file. I've provided some comments here, but things may not be so
+# clear without further explanation.
+#
+# Last Modified: 11-21-2006
+#
+##############################################################################
+
+
+# LOG FILE
+# This is the main log file where service and host events are logged
+# for historical purposes. This should be the first option specified
+# in the config file!!!
+
+#log_file=/usr/local/nagios/var/nagios.log
+log_file=/var/log/nagios/nagios.log
+
+# OBJECT CONFIGURATION FILE(S)
+# This is the configuration file in which you define hosts, host
+# groups, contacts, contact groups, services, etc. I guess it would
+# be better called an object definition file, but for historical
+# reasons it isn't. You can split object definitions into several
+# different config files by using multiple cfg_file statements here.
+# Nagios will read and process all the config files you define.
+# This can be very useful if you want to keep command definitions
+# separate from host and contact definitions...
+
+# Command definitions
+cfg_file=/usr/local/nagios/etc/commands.cfg
+
+# Host and service definitions for monitoring this machine
+#cfg_file=/usr/local/nagios/etc/localhost.cfg
+
+
+# You can split other types of object definitions across several
+# config files if you wish (as done here), or keep them all in a
+# single config file.
+
+cfg_file=/usr/local/nagios/etc/contactgroups.cfg
+cfg_file=/usr/local/nagios/etc/contacts.cfg
+#cfg_file=/usr/local/nagios/etc/dependencies.cfg
+#cfg_file=/usr/local/nagios/etc/escalations.cfg
+cfg_file=/usr/local/nagios/etc/hostgroups.cfg
+cfg_file=/usr/local/nagios/etc/hosts.cfg
+cfg_file=/usr/local/nagios/etc/services.cfg
+#cfg_file=/usr/local/nagios/etc/services_qadst.cfg
+cfg_file=/usr/local/nagios/etc/servicegroups.cfg
+cfg_file=/usr/local/nagios/etc/timeperiods.cfg
+
+# Extended host/service info definitions are now stored along with
+# other object definitions:
+#cfg_file=/usr/local/nagios/etc/hostextinfo.cfg
+#cfg_file=/usr/local/nagios/etc/serviceextinfo.cfg
+
+# You can also tell Nagios to process all config files (with a .cfg
+# extension) in a particular directory by using the cfg_dir
+# directive as shown below:
+
+#cfg_dir=/usr/local/nagios/etc/servers
+#cfg_dir=/usr/local/nagios/etc/printers
+#cfg_dir=/usr/local/nagios/etc/switches
+#cfg_dir=/usr/local/nagios/etc/routers
+
+
+
+# OBJECT CACHE FILE
+# This option determines where object definitions are cached when
+# Nagios starts/restarts. The CGIs read object definitions from
+# this cache file (rather than looking at the object config files
+# directly) in order to prevent inconsistencies that can occur
+# when the config files are modified after Nagios starts.
+
+object_cache_file=/usr/local/nagios/var/objects.cache
+
+
+
+# RESOURCE FILE
+# This is an optional resource file that contains $USERx$ macro
+# definitions. Multiple resource files can be specified by using
+# multiple resource_file definitions. The CGIs will not attempt to
+# read the contents of resource files, so information that is
+# considered to be sensitive (usernames, passwords, etc) can be
+# defined as macros in this file and restrictive permissions (600)
+# can be placed on this file.
+
+resource_file=/usr/local/nagios/etc/resource.cfg
+
+
+
+# STATUS FILE
+# This is where the current status of all monitored services and
+# hosts is stored. Its contents are read and processed by the CGIs.
+# The contents of the status file are deleted every time Nagios
+# restarts.
+
+#status_file=/usr/local/nagios/var/status.dat
+status_file=/var/log/nagios/status.dat
+
+
+# NAGIOS USER
+# This determines the effective user that Nagios should run as.
+# You can either supply a username or a UID.
+
+nagios_user=hadaq
+
+
+
+# NAGIOS GROUP
+# This determines the effective group that Nagios should run as.
+# You can either supply a group name or a GID.
+
+nagios_group=users
+
+
+
+# EXTERNAL COMMAND OPTION
+# This option allows you to specify whether or not Nagios should check
+# for external commands (in the command file defined below). By default
+# Nagios will *not* check for external commands, just to be on the
+# cautious side. If you want to be able to use the CGI command interface
+# you will have to enable this. Setting this value to 0 disables command
+# checking (the default), other values enable it.
+
+check_external_commands=1
+
+
+
+# EXTERNAL COMMAND CHECK INTERVAL
+# This is the interval at which Nagios should check for external commands.
+# This value works of the interval_length you specify later. If you leave
+# that at its default value of 60 (seconds), a value of 1 here will cause
+# Nagios to check for external commands every minute. If you specify a
+# number followed by an "s" (i.e. 15s), this will be interpreted to mean
+# actual seconds rather than a multiple of the interval_length variable.
+# Note: In addition to reading the external command file at regularly
+# scheduled intervals, Nagios will also check for external commands after
+# event handlers are executed.
+# NOTE: Setting this value to -1 causes Nagios to check the external
+# command file as often as possible.
+
+#command_check_interval=15s
+command_check_interval=-1
+
+
+
+# EXTERNAL COMMAND FILE
+# This is the file that Nagios checks for external command requests.
+# It is also where the command CGI will write commands that are submitted
+# by users, so it must be writeable by the user that the web server
+# is running as (usually 'nobody'). Permissions should be set at the
+# directory level instead of on the file, as the file is deleted every
+# time its contents are processed.
+
+command_file=/usr/local/nagios/var/rw/nagios.cmd
+
+
+
+# COMMENT FILE
+# This is the file that Nagios will use for storing host and service
+# comments.
+
+#comment_file=/usr/local/nagios/var/comments.dat
+comment_file=/var/log/nagios/comments.dat
+
+
+# DOWNTIME FILE
+# This is the file that Nagios will use for storing host and service
+# downtime data.
+
+#downtime_file=/usr/local/nagios/var/downtime.dat
+downtime_file=/var/log/nagios/downtime.dat
+
+
+# LOCK FILE
+# This is the lockfile that Nagios will use to store its PID number
+# in when it is running in daemon mode.
+
+#lock_file=/usr/local/nagios/var/nagios.lock
+lock_file=/var/log/nagios/nagios.lock
+
+
+# TEMP FILE
+# This is a temporary file that is used as scratch space when Nagios
+# updates the status log, cleans the comment file, etc. This file
+# is created, used, and deleted throughout the time that Nagios is
+# running.
+
+temp_file=/usr/local/nagios/var/nagios.tmp
+
+
+
+# EVENT BROKER OPTIONS
+# Controls what (if any) data gets sent to the event broker.
+# Values: 0 = Broker nothing
+# -1 = Broker everything
+# <other> = See documentation
+
+event_broker_options=-1
+
+
+
+# EVENT BROKER MODULE(S)
+# This directive is used to specify an event broker module that should
+# by loaded by Nagios at startup. Use multiple directives if you want
+# to load more than one module. Arguments that should be passed to
+# the module at startup are seperated from the module path by a space.
+#
+# Example:
+#
+# broker_module=<modulepath> [moduleargs]
+
+#broker_module=/somewhere/module1.o
+#broker_module=/somewhere/module2.o arg1 arg2=3 debug=0
+
+
+
+
+# LOG ROTATION METHOD
+# This is the log rotation method that Nagios should use to rotate
+# the main log file. Values are as follows..
+# n = None - don't rotate the log
+# h = Hourly rotation (top of the hour)
+# d = Daily rotation (midnight every day)
+# w = Weekly rotation (midnight on Saturday evening)
+# m = Monthly rotation (midnight last day of month)
+
+log_rotation_method=d
+
+
+
+# LOG ARCHIVE PATH
+# This is the directory where archived (rotated) log files should be
+# placed (assuming you've chosen to do log rotation).
+
+log_archive_path=/usr/local/nagios/var/archives
+
+
+
+# LOGGING OPTIONS
+# If you want messages logged to the syslog facility, as well as the
+# NetAlarm log file set this option to 1. If not, set it to 0.
+
+use_syslog=0
+
+
+
+# NOTIFICATION LOGGING OPTION
+# If you don't want notifications to be logged, set this value to 0.
+# If notifications should be logged, set the value to 1.
+
+log_notifications=0
+
+
+
+# SERVICE RETRY LOGGING OPTION
+# If you don't want service check retries to be logged, set this value
+# to 0. If retries should be logged, set the value to 1.
+
+log_service_retries=1
+
+
+
+# HOST RETRY LOGGING OPTION
+# If you don't want host check retries to be logged, set this value to
+# 0. If retries should be logged, set the value to 1.
+
+log_host_retries=1
+
+
+
+# EVENT HANDLER LOGGING OPTION
+# If you don't want host and service event handlers to be logged, set
+# this value to 0. If event handlers should be logged, set the value
+# to 1.
+
+log_event_handlers=1
+
+
+
+# INITIAL STATES LOGGING OPTION
+# If you want Nagios to log all initial host and service states to
+# the main log file (the first time the service or host is checked)
+# you can enable this option by setting this value to 1. If you
+# are not using an external application that does long term state
+# statistics reporting, you do not need to enable this option. In
+# this case, set the value to 0.
+
+log_initial_states=0
+
+
+
+# EXTERNAL COMMANDS LOGGING OPTION
+# If you don't want Nagios to log external commands, set this value
+# to 0. If external commands should be logged, set this value to 1.
+# Note: This option does not include logging of passive service
+# checks - see the option below for controlling whether or not
+# passive checks are logged.
+
+log_external_commands=1
+
+
+
+# PASSIVE CHECKS LOGGING OPTION
+# If you don't want Nagios to log passive host and service checks, set
+# this value to 0. If passive checks should be logged, set
+# this value to 1.
+
+log_passive_checks=1
+
+
+
+# GLOBAL HOST AND SERVICE EVENT HANDLERS
+# These options allow you to specify a host and service event handler
+# command that is to be run for every host or service state change.
+# The global event handler is executed immediately prior to the event
+# handler that you have optionally specified in each host or
+# service definition. The command argument is the short name of a
+# command definition that you define in your host configuration file.
+# Read the HTML docs for more information.
+
+#global_host_event_handler=somecommand
+#global_service_event_handler=somecommand
+
+
+
+# SERVICE INTER-CHECK DELAY METHOD
+# This is the method that Nagios should use when initially
+# "spreading out" service checks when it starts monitoring. The
+# default is to use smart delay calculation, which will try to
+# space all service checks out evenly to minimize CPU load.
+# Using the dumb setting will cause all checks to be scheduled
+# at the same time (with no delay between them)! This is not a
+# good thing for production, but is useful when testing the
+# parallelization functionality.
+# n = None - don't use any delay between checks
+# d = Use a "dumb" delay of 1 second between checks
+# s = Use "smart" inter-check delay calculation
+# x.xx = Use an inter-check delay of x.xx seconds
+
+service_inter_check_delay_method=s
+
+
+
+# MAXIMUM SERVICE CHECK SPREAD
+# This variable determines the timeframe (in minutes) from the
+# program start time that an initial check of all services should
+# be completed. Default is 30 minutes.
+
+max_service_check_spread=30
+
+
+
+# SERVICE CHECK INTERLEAVE FACTOR
+# This variable determines how service checks are interleaved.
+# Interleaving the service checks allows for a more even
+# distribution of service checks and reduced load on remote
+# hosts. Setting this value to 1 is equivalent to how versions
+# of Nagios previous to 0.0.5 did service checks. Set this
+# value to s (smart) for automatic calculation of the interleave
+# factor unless you have a specific reason to change it.
+# s = Use "smart" interleave factor calculation
+# x = Use an interleave factor of x, where x is a
+# number greater than or equal to 1.
+
+service_interleave_factor=s
+
+
+
+# HOST INTER-CHECK DELAY METHOD
+# This is the method that Nagios should use when initially
+# "spreading out" host checks when it starts monitoring. The
+# default is to use smart delay calculation, which will try to
+# space all host checks out evenly to minimize CPU load.
+# Using the dumb setting will cause all checks to be scheduled
+# at the same time (with no delay between them)!
+# n = None - don't use any delay between checks
+# d = Use a "dumb" delay of 1 second between checks
+# s = Use "smart" inter-check delay calculation
+# x.xx = Use an inter-check delay of x.xx seconds
+
+host_inter_check_delay_method=s
+
+
+
+# MAXIMUM HOST CHECK SPREAD
+# This variable determines the timeframe (in minutes) from the
+# program start time that an initial check of all hosts should
+# be completed. Default is 30 minutes.
+
+max_host_check_spread=30
+
+
+
+# MAXIMUM CONCURRENT SERVICE CHECKS
+# This option allows you to specify the maximum number of
+# service checks that can be run in parallel at any given time.
+# Specifying a value of 1 for this variable essentially prevents
+# any service checks from being parallelized. A value of 0
+# will not restrict the number of concurrent checks that are
+# being executed.
+
+max_concurrent_checks=0
+
+
+
+# SERVICE CHECK REAPER FREQUENCY
+# This is the frequency (in seconds!) that Nagios will process
+# the results of services that have been checked.
+
+service_reaper_frequency=10
+
+
+
+
+# AUTO-RESCHEDULING OPTION
+# This option determines whether or not Nagios will attempt to
+# automatically reschedule active host and service checks to
+# "smooth" them out over time. This can help balance the load on
+# the monitoring server.
+# WARNING: THIS IS AN EXPERIMENTAL FEATURE - IT CAN DEGRADE
+# PERFORMANCE, RATHER THAN INCREASE IT, IF USED IMPROPERLY
+
+auto_reschedule_checks=0
+
+
+
+# AUTO-RESCHEDULING INTERVAL
+# This option determines how often (in seconds) Nagios will
+# attempt to automatically reschedule checks. This option only
+# has an effect if the auto_reschedule_checks option is enabled.
+# Default is 30 seconds.
+# WARNING: THIS IS AN EXPERIMENTAL FEATURE - IT CAN DEGRADE
+# PERFORMANCE, RATHER THAN INCREASE IT, IF USED IMPROPERLY
+
+auto_rescheduling_interval=30
+
+
+
+
+# AUTO-RESCHEDULING WINDOW
+# This option determines the "window" of time (in seconds) that
+# Nagios will look at when automatically rescheduling checks.
+# Only host and service checks that occur in the next X seconds
+# (determined by this variable) will be rescheduled. This option
+# only has an effect if the auto_reschedule_checks option is
+# enabled. Default is 180 seconds (3 minutes).
+# WARNING: THIS IS AN EXPERIMENTAL FEATURE - IT CAN DEGRADE
+# PERFORMANCE, RATHER THAN INCREASE IT, IF USED IMPROPERLY
+
+auto_rescheduling_window=180
+
+
+
+# SLEEP TIME
+# This is the number of seconds to sleep between checking for system
+# events and service checks that need to be run.
+
+sleep_time=0.25
+
+
+
+# TIMEOUT VALUES
+# These options control how much time Nagios will allow various
+# types of commands to execute before killing them off. Options
+# are available for controlling maximum time allotted for
+# service checks, host checks, event handlers, notifications, the
+# ocsp command, and performance data commands. All values are in
+# seconds.
+
+service_check_timeout=60
+host_check_timeout=30
+event_handler_timeout=30
+notification_timeout=30
+ocsp_timeout=5
+perfdata_timeout=5
+
+
+
+# RETAIN STATE INFORMATION
+# This setting determines whether or not Nagios will save state
+# information for services and hosts before it shuts down. Upon
+# startup Nagios will reload all saved service and host state
+# information before starting to monitor. This is useful for
+# maintaining long-term data on state statistics, etc, but will
+# slow Nagios down a bit when it (re)starts. Since its only
+# a one-time penalty, I think its well worth the additional
+# startup delay.
+
+retain_state_information=1
+
+
+
+# STATE RETENTION FILE
+# This is the file that Nagios should use to store host and
+# service state information before it shuts down. The state
+# information in this file is also read immediately prior to
+# starting to monitor the network when Nagios is restarted.
+# This file is used only if the preserve_state_information
+# variable is set to 1.
+
+state_retention_file=/usr/local/nagios/var/retention.dat
+
+
+
+# RETENTION DATA UPDATE INTERVAL
+# This setting determines how often (in minutes) that Nagios
+# will automatically save retention data during normal operation.
+# If you set this value to 0, Nagios will not save retention
+# data at regular interval, but it will still save retention
+# data before shutting down or restarting. If you have disabled
+# state retention, this option has no effect.
+
+retention_update_interval=60
+
+
+
+# USE RETAINED PROGRAM STATE
+# This setting determines whether or not Nagios will set
+# program status variables based on the values saved in the
+# retention file. If you want to use retained program status
+# information, set this value to 1. If not, set this value
+# to 0.
+
+use_retained_program_state=1
+
+
+
+# USE RETAINED SCHEDULING INFO
+# This setting determines whether or not Nagios will retain
+# the scheduling info (next check time) for hosts and services
+# based on the values saved in the retention file. If you
+# If you want to use retained scheduling info, set this
+# value to 1. If not, set this value to 0.
+
+use_retained_scheduling_info=0
+
+
+
+# INTERVAL LENGTH
+# This is the seconds per unit interval as used in the
+# host/contact/service configuration files. Setting this to 60 means
+# that each interval is one minute long (60 seconds). Other settings
+# have not been tested much, so your mileage is likely to vary...
+
+interval_length=60
+
+
+
+# AGGRESSIVE HOST CHECKING OPTION
+# If you don't want to turn on aggressive host checking features, set
+# this value to 0 (the default). Otherwise set this value to 1 to
+# enable the aggressive check option. Read the docs for more info
+# on what aggressive host check is or check out the source code in
+# base/checks.c
+
+use_aggressive_host_checking=0
+
+
+
+# SERVICE CHECK EXECUTION OPTION
+# This determines whether or not Nagios will actively execute
+# service checks when it initially starts. If this option is
+# disabled, checks are not actively made, but Nagios can still
+# receive and process passive check results that come in. Unless
+# you're implementing redundant hosts or have a special need for
+# disabling the execution of service checks, leave this enabled!
+# Values: 1 = enable checks, 0 = disable checks
+
+execute_service_checks=1
+
+
+
+# PASSIVE SERVICE CHECK ACCEPTANCE OPTION
+# This determines whether or not Nagios will accept passive
+# service checks results when it initially (re)starts.
+# Values: 1 = accept passive checks, 0 = reject passive checks
+
+accept_passive_service_checks=1
+
+
+
+# HOST CHECK EXECUTION OPTION
+# This determines whether or not Nagios will actively execute
+# host checks when it initially starts. If this option is
+# disabled, checks are not actively made, but Nagios can still
+# receive and process passive check results that come in. Unless
+# you're implementing redundant hosts or have a special need for
+# disabling the execution of host checks, leave this enabled!
+# Values: 1 = enable checks, 0 = disable checks
+
+execute_host_checks=1
+
+
+
+# PASSIVE HOST CHECK ACCEPTANCE OPTION
+# This determines whether or not Nagios will accept passive
+# host checks results when it initially (re)starts.
+# Values: 1 = accept passive checks, 0 = reject passive checks
+
+accept_passive_host_checks=1
+
+
+
+# NOTIFICATIONS OPTION
+# This determines whether or not Nagios will sent out any host or
+# service notifications when it is initially (re)started.
+# Values: 1 = enable notifications, 0 = disable notifications
+
+enable_notifications=0
+
+
+
+# EVENT HANDLER USE OPTION
+# This determines whether or not Nagios will run any host or
+# service event handlers when it is initially (re)started. Unless
+# you're implementing redundant hosts, leave this option enabled.
+# Values: 1 = enable event handlers, 0 = disable event handlers
+
+enable_event_handlers=1
+
+
+
+# PROCESS PERFORMANCE DATA OPTION
+# This determines whether or not Nagios will process performance
+# data returned from service and host checks. If this option is
+# enabled, host performance data will be processed using the
+# host_perfdata_command (defined below) and service performance
+# data will be processed using the service_perfdata_command (also
+# defined below). Read the HTML docs for more information on
+# performance data.
+# Values: 1 = process performance data, 0 = do not process performance data
+
+process_performance_data=0
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA PROCESSING COMMANDS
+# These commands are run after every host and service check is
+# performed. These commands are executed only if the
+# enable_performance_data option (above) is set to 1. The command
+# argument is the short name of a command definition that you
+# define in your host configuration file. Read the HTML docs for
+# more information on performance data.
+
+#host_perfdata_command=process-host-perfdata
+#service_perfdata_command=process-service-perfdata
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA FILES
+# These files are used to store host and service performance data.
+# Performance data is only written to these files if the
+# enable_performance_data option (above) is set to 1.
+
+#host_perfdata_file=/tmp/host-perfdata
+#service_perfdata_file=/tmp/service-perfdata
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA FILE TEMPLATES
+# These options determine what data is written (and how) to the
+# performance data files. The templates may contain macros, special
+# characters (\t for tab, \r for carriage return, \n for newline)
+# and plain text. A newline is automatically added after each write
+# to the performance data file. Some examples of what you can do are
+# shown below.
+
+#host_perfdata_file_template=[HOSTPERFDATA]\t$TIMET$\t$HOSTNAME$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$
+#service_perfdata_file_template=[SERVICEPERFDATA]\t$TIMET$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$
+
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA FILE MODES
+# This option determines whether or not the host and service
+# performance data files are opened in write ("w") or append ("a")
+# mode. Unless you are the files are named pipes, you will probably
+# want to use the default mode of append ("a").
+
+#host_perfdata_file_mode=a
+#service_perfdata_file_mode=a
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING INTERVAL
+# These options determine how often (in seconds) the host and service
+# performance data files are processed using the commands defined
+# below. A value of 0 indicates the files should not be periodically
+# processed.
+
+#host_perfdata_file_processing_interval=0
+#service_perfdata_file_processing_interval=0
+
+
+
+# HOST AND SERVICE PERFORMANCE DATA FILE PROCESSING COMMANDS
+# These commands are used to periodically process the host and
+# service performance data files. The interval at which the
+# processing occurs is determined by the options above.
+
+#host_perfdata_file_processing_command=process-host-perfdata-file
+#service_perfdata_file_processing_command=process-service-perfdata-file
+
+
+
+# OBSESS OVER SERVICE CHECKS OPTION
+# This determines whether or not Nagios will obsess over service
+# checks and run the ocsp_command defined below. Unless you're
+# planning on implementing distributed monitoring, do not enable
+# this option. Read the HTML docs for more information on
+# implementing distributed monitoring.
+# Values: 1 = obsess over services, 0 = do not obsess (default)
+
+obsess_over_services=0
+
+
+
+# OBSESSIVE COMPULSIVE SERVICE PROCESSOR COMMAND
+# This is the command that is run for every service check that is
+# processed by Nagios. This command is executed only if the
+# obsess_over_service option (above) is set to 1. The command
+# argument is the short name of a command definition that you
+# define in your host configuration file. Read the HTML docs for
+# more information on implementing distributed monitoring.
+
+#ocsp_command=somecommand
+
+
+
+# ORPHANED SERVICE CHECK OPTION
+# This determines whether or not Nagios will periodically
+# check for orphaned services. Since service checks are not
+# rescheduled until the results of their previous execution
+# instance are processed, there exists a possibility that some
+# checks may never get rescheduled. This seems to be a rare
+# problem and should not happen under normal circumstances.
+# If you have problems with service checks never getting
+# rescheduled, you might want to try enabling this option.
+# Values: 1 = enable checks, 0 = disable checks
+
+check_for_orphaned_services=1
+
+
+
+# SERVICE FRESHNESS CHECK OPTION
+# This option determines whether or not Nagios will periodically
+# check the "freshness" of service results. Enabling this option
+# is useful for ensuring passive checks are received in a timely
+# manner.
+# Values: 1 = enabled freshness checking, 0 = disable freshness checking
+
+check_service_freshness=1
+
+
+
+# SERVICE FRESHNESS CHECK INTERVAL
+# This setting determines how often (in seconds) Nagios will
+# check the "freshness" of service check results. If you have
+# disabled service freshness checking, this option has no effect.
+
+service_freshness_check_interval=60
+
+
+
+# HOST FRESHNESS CHECK OPTION
+# This option determines whether or not Nagios will periodically
+# check the "freshness" of host results. Enabling this option
+# is useful for ensuring passive checks are received in a timely
+# manner.
+# Values: 1 = enabled freshness checking, 0 = disable freshness checking
+
+check_host_freshness=0
+
+
+
+# HOST FRESHNESS CHECK INTERVAL
+# This setting determines how often (in seconds) Nagios will
+# check the "freshness" of host check results. If you have
+# disabled host freshness checking, this option has no effect.
+
+host_freshness_check_interval=60
+
+
+
+# AGGREGATED STATUS UPDATES
+# This option determines whether or not Nagios will
+# aggregate updates of host, service, and program status
+# data. Normally, status data is updated immediately when
+# a change occurs. This can result in high CPU loads if
+# you are monitoring a lot of services. If you want Nagios
+# to only refresh status data every few seconds, disable
+# this option.
+# Values: 1 = enable aggregate updates, 0 = disable aggregate updates
+
+aggregate_status_updates=1
+
+
+
+# AGGREGATED STATUS UPDATE INTERVAL
+# Combined with the aggregate_status_updates option,
+# this option determines the frequency (in seconds!) that
+# Nagios will periodically dump program, host, and
+# service status data. If you are not using aggregated
+# status data updates, this option has no effect.
+
+status_update_interval=15
+
+
+
+# FLAP DETECTION OPTION
+# This option determines whether or not Nagios will try
+# and detect hosts and services that are "flapping".
+# Flapping occurs when a host or service changes between
+# states too frequently. When Nagios detects that a
+# host or service is flapping, it will temporarily suppress
+# notifications for that host/service until it stops
+# flapping. Flap detection is very experimental, so read
+# the HTML documentation before enabling this feature!
+# Values: 1 = enable flap detection
+# 0 = disable flap detection (default)
+
+enable_flap_detection=0
+
+
+
+# FLAP DETECTION THRESHOLDS FOR HOSTS AND SERVICES
+# Read the HTML documentation on flap detection for
+# an explanation of what this option does. This option
+# has no effect if flap detection is disabled.
+
+low_service_flap_threshold=5.0
+high_service_flap_threshold=20.0
+low_host_flap_threshold=5.0
+high_host_flap_threshold=20.0
+
+
+
+# DATE FORMAT OPTION
+# This option determines how short dates are displayed. Valid options
+# include:
+# us (MM-DD-YYYY HH:MM:SS)
+# euro (DD-MM-YYYY HH:MM:SS)
+# iso8601 (YYYY-MM-DD HH:MM:SS)
+# strict-iso8601 (YYYY-MM-DDTHH:MM:SS)
+#
+
+date_format=iso8601
+
+
+
+# P1.PL FILE LOCATION
+# This value determines where the p1.pl perl script (used by the
+# embedded Perl interpreter) is located. If you didn't compile
+# Nagios with embedded Perl support, this option has no effect.
+
+p1_file=/usr/local/nagios/bin/p1.pl
+
+
+
+# ILLEGAL OBJECT NAME CHARACTERS
+# This option allows you to specify illegal characters that cannot
+# be used in host names, service descriptions, or names of other
+# object types.
+
+illegal_object_name_chars=`~!$%^&*|'"<>?,()=
+
+
+
+# ILLEGAL MACRO OUTPUT CHARACTERS
+# This option allows you to specify illegal characters that are
+# stripped from macros before being used in notifications, event
+# handlers, etc. This DOES NOT affect macros used in service or
+# host check commands.
+# The following macros are stripped of the characters you specify:
+# $HOSTOUTPUT$
+# $HOSTPERFDATA$
+# $HOSTACKAUTHOR$
+# $HOSTACKCOMMENT$
+# $SERVICEOUTPUT$
+# $SERVICEPERFDATA$
+# $SERVICEACKAUTHOR$
+# $SERVICEACKCOMMENT$
+
+illegal_macro_output_chars=`~$&|'"<>
+
+
+
+# REGULAR EXPRESSION MATCHING
+# This option controls whether or not regular expression matching
+# takes place in the object config files. Regular expression
+# matching is used to match host, hostgroup, service, and service
+# group names/descriptions in some fields of various object types.
+# Values: 1 = enable regexp matching, 0 = disable regexp matching
+
+use_regexp_matching=0
+
+
+
+# "TRUE" REGULAR EXPRESSION MATCHING
+# This option controls whether or not "true" regular expression
+# matching takes place in the object config files. This option
+# only has an effect if regular expression matching is enabled
+# (see above). If this option is DISABLED, regular expression
+# matching only occurs if a string contains wildcard characters
+# (* and ?). If the option is ENABLED, regexp matching occurs
+# all the time (which can be annoying).
+# Values: 1 = enable true matching, 0 = disable true matching
+
+use_true_regexp_matching=0
+
+
+
+
+# ADMINISTRATOR EMAIL ADDRESS
+# The email address of the administrator of *this* machine (the one
+# doing the monitoring). Nagios never uses this value itself, but
+# you can access this value by using the $ADMINEMAIL$ macro in your
+# notification commands.
+
+admin_email=s.yurevich@gsi.de
+
+
+# ADMINISTRATOR PAGER NUMBER/ADDRESS
+# The pager number/address for the administrator of *this* machine.
+# Nagios never uses this value itself, but you can access this
+# value by using the $ADMINPAGER$ macro in your notification
+# commands.
+
+admin_pager=pagehadaq
+
+
+
+# DAEMON CORE DUMP OPTION
+# This option determines whether or not Nagios is allowed to create
+# a core dump when it runs as a daemon. Note that it is generally
+# considered bad form to allow this, but it may be useful for
+# debugging purposes.
+# Values: 1 - Allow core dumps
+# 0 - Do not allow core dumps (default)
+
+daemon_dumps_core=0
+
+
+
--- /dev/null
+# SOUND SERVER service group
+define servicegroup{
+ servicegroup_name soundserver-group
+ alias SOUND SERVER
+ members hadesdaq,SOUND_SERVER,lxhadesdaq,SOUND_SERVER,hadc08,SOUND_SERVER;
+ }
+
+# HARD DISK TEST service group
+define servicegroup{
+ servicegroup_name harddisk-group
+ alias DISK TEST
+ members hadesdaq,DISK TEST,hadesdaq,RAID1,hadeb07,DISK_AB TEST,hadeb07,DISK_CD TEST,hades17,DISK TEST,hades17,RAID1,hades25,DISK TEST,hades25,RAID1,hades27,DISK TEST,hades27,RAID1;
+ }
+
+# online QA/DST service group
+#define servicegroup{
+# servicegroup_name onlinedst-group
+# alias online QA/DST
+# members lxg0411,updateQA,lxg0411,updateDST,lxg0430,runPairDST,lxg0440,runPairDST,lxg0441,runPairDST,lxg0442,runPairDST,lxg0443,runPairDST,lxg0444,runPairDST,lxg0451,runQA,lxg0452,runPairDST;
+# }
\ No newline at end of file
--- /dev/null
+# Generic service definition template - This is NOT a real service, just a template!
+
+define service{
+ name generic-service ; The 'name' of this service template
+ active_checks_enabled 1 ; Active service checks are enabled
+ passive_checks_enabled 1 ; Passive service checks are enabled accepted
+ parallelize_check 1 ; Active service checks should be par allelized (disabling this can lead to major performance problems)
+ obsess_over_service 1 ; We should obsess over this service (if necessary)
+ check_freshness 0 ; Default is to NOT check service 'freshness'
+ notifications_enabled 1 ; Service notifications are enabled
+ event_handler_enabled 1 ; Service event handler is enabled
+ flap_detection_enabled 1 ; Flap detection is enabled
+ failure_prediction_enabled 1 ; Failure prediction is enabled
+ process_perf_data 1 ; Process performance data
+ retain_status_information 1 ; Retain status information across program restarts
+ retain_nonstatus_information 1 ; Retain non-status information across program restarts
+ is_volatile 0 ; The service is not volatile
+ register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
+ }
+
+# PING-SERVICE
+define service{
+ use generic-service
+ name ping-service
+ hostgroups *
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 5
+ retry_check_interval 1
+ contact_groups linux-admins ; Make sure that the value here is also located in the contactgroup.cfg
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+
+ register 0
+ }
+
+# SSH-SERVICE
+define service{
+ use generic-service
+ name ssh-service
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 10
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ hostgroups *
+
+ register 0
+ }
+
+# PING-SERVICE for lxg hosts
+define service{
+ use generic-service
+ name ping-service-lxg
+ hostgroups *
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins ; Make sure that the value here is also located in the contactgroup.cfg
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+
+ register 0
+ }
+
+# SSH-SERVICE for lxg hosts
+define service{
+ use generic-service
+ name ssh-service-lxg
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ hostgroups *
+
+ register 0
+ }
+
+# PING
+define service{
+ use ping-service ; Name of service template to use
+ hostgroup_name vmecpu-group,hadeb-group
+# host_name *
+ service_description PING
+ check_command check_ping!100.0,20%!500.0,60%
+ }
+
+
+# SSH
+define service{
+ use ssh-service
+# host_name *
+ hostgroup_name vmecpu-group,hadeb-group
+ service_description SSH
+ check_command check_ssh!2
+}
+
+############# COMMON SERVICES FOR LXG04**
+# PING
+define service{
+ use ping-service-lxg ; Name of service template to use
+ hostgroup_name lxg-group,hades-group
+# host_name *
+ service_description PING
+ check_command check_ping!100.0,20%!500.0,60%
+ }
+
+
+# SSH
+define service{
+ use ssh-service-lxg
+# host_name *
+ hostgroup_name lxg-group,hades-group
+ service_description SSH
+ check_command check_ssh!2
+}
+
+####################### hadesdaq ##########################
+# local raid
+define service{
+ use generic-service
+ host_name hadesdaq
+ service_description RAID1
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 30
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_raid
+ }
+
+# local check load
+define service{
+ use generic-service
+ host_name hadesdaq
+ service_description CPU LOAD
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 5
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_load!10!10!0.9!20!20!1.5!
+ }
+
+# local disk test
+define service{
+ use generic-service
+ host_name hadesdaq
+ service_description DISK TEST
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 480
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 480
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_smartctl_temp!/dev/sda!/dev/sdb!50!60!
+ }
+
+# check process: sound_server.pl
+define service{
+ use generic-service
+ host_name hadesdaq
+ service_description SOUND_SERVER
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 5
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 480
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc2!sound_server.pl!
+ }
+
+################### lxhadesdaq ########################
+#check disk space
+define service{
+ use generic-service
+ host_name lxhadesdaq
+ service_description /DATA
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 30
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh!20%!10%!/data!
+ }
+
+#check disk space
+define service{
+ use generic-service
+ host_name lxhadesdaq
+ service_description /VAR
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh!8%!4%!/var!
+ }
+
+# remote cpu load
+define service{
+ use generic-service
+ host_name lxhadesdaq
+ service_description CPU LOAD
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 5
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_load_by_ssh!10!10!2.5!20!20!3.5!
+ }
+
+#check archivist
+define service{
+ use generic-service
+ host_name lxhadesdaq
+ service_description ARCHIVIST
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 5
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_archivist!1978!
+ }
+
+#check process: runinfo2ora.pl
+define service{
+ use generic-service
+ host_name lxhadesdaq
+ service_description RUNINFO2ORA
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 10
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc2_by_ssh!runinfo2ora.pl!
+ }
+
+#check process: sound_server.pl
+define service{
+ use generic-service
+ host_name lxhadesdaq
+ service_description SOUND_SERVER
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 10
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc2_by_ssh!sound_server.pl!
+ }
+
+#check process: dhcp service
+define service{
+ use generic-service
+ host_name lxhadesdaq
+ service_description DHCP
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 120
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_dhcp!140.181.75.158!
+ }
+
+#check EB log file for discardred events
+define service{
+ use generic-service
+ host_name lxhadesdaq
+ service_description DISCARDED EVTS
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_eblog!50501!
+ }
+
+#check EB log file for discardred events
+define service{
+ use generic-service
+ host_name lxhadesdaq
+ service_description LUSTRE
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 2
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc_status!50502!check_lustre!
+ }
+
+####################### hadeb01 ###########################
+# remote disk check
+#define service{
+# use generic-service
+# host_name hadeb01
+# service_description /VAR
+# is_volatile 0
+# check_period 24x7
+# max_check_attempts 3
+# normal_check_interval 60
+# retry_check_interval 1
+# contact_groups linux-admins
+# notification_interval 120
+# notification_period 24x7
+# notification_options c,r
+# check_command check_disk_by_ssh!20%!10%!/var!
+# }
+
+####################### hadeb03 ###########################
+# remote disk check
+define service{
+ use generic-service
+ host_name hadeb03
+ service_description /D/HADEB03
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh!20%!10%!/d/hadeb03!
+ }
+
+# remote disk check
+define service{
+ use generic-service
+ host_name hadeb03
+ service_description /D/HADEB03B
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh!20%!10%!/d/hadeb03b!
+ }
+
+# remote raid check
+define service{
+ use generic-service
+ host_name hadeb03
+ service_description RAID1
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_raid_by_ssh
+ }
+
+####################### hadeb04 ###########################
+# remote disk check
+define service{
+ use generic-service
+ host_name hadeb04
+ service_description /DATA/HADEB04
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh!20%!10%!/data/hadeb04!
+ }
+
+####################### hadeb05 ###########################
+# remote disk check
+define service{
+ use generic-service
+ host_name hadeb05
+ service_description /
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh!20%!10%!/!
+ }
+
+# remote cpu load
+define service{
+ use generic-service
+ host_name hadeb05
+ service_description CPU LOAD
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 5
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_load_by_ssh!10!10!1.0!20!20!1.5!
+ }
+
+####################### hadeb06 ###########################
+# remote disk check
+define service{
+ use generic-service
+ host_name hadeb06a
+ service_description /DATA/HADEB06
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh!20%!10%!/data/hadeb06!
+ }
+
+#define service{
+# use generic-service
+# host_name hadeb06a
+# service_description CONNECT_RES_RAM
+# is_volatile 0
+# check_period 24x7
+# max_check_attempts 3
+# normal_check_interval 10
+# retry_check_interval 1
+# contact_groups linux-admins
+# notification_interval 120
+# notification_period 24x7
+# notification_options c,r
+# check_command check_proc2_by_ssh!connect_res_ram!
+# }
+
+#define service{
+# use generic-service
+# host_name hadeb06a
+# service_description GET_HLD_RAMDISK
+# is_volatile 0
+# check_period 24x7
+# max_check_attempts 3
+# normal_check_interval 10
+# retry_check_interval 1
+# contact_groups linux-admins
+# notification_interval 120
+# notification_period 24x7
+# notification_options c,r
+# check_command check_proc2_by_ssh!get_hld_ramdisk!
+# }
+
+# remote process (connect_res) check status
+define service{
+ use generic-service
+ host_name hadeb06a
+ service_description CONNECT_RES_RAM
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 10
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc_status!50501!connect_res_ram!
+ }
+
+# remote process (get_hld_ramdisk) check status
+define service{
+ use generic-service
+ host_name hadeb06a
+ service_description GET_HLD_RAMDISK
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 10
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc_status!50502!get_hld_ramdisk!
+ }
+
+####################### lxg0434 ###########################
+# remote process (check_archiver) check status
+define service{
+ use generic-service
+ host_name lxg0434
+ service_description ARCHIVER
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 5
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc_status!50501!check_archiver!
+ }
+
+####################### lxg0447 ###########################
+# remote disk check
+define service{
+ use generic-service
+ host_name lxg0447
+ service_description /DATA.LOCAL2
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh_lxg0447!30%!20%!/data.local2!
+ }
+
+# remote process (connect_res) check
+define service{
+ use generic-service
+ host_name lxg0447
+ service_description CONNECT_RES
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc_by_ssh!connect_res!
+ }
+
+# remote process (connect_res) check status
+define service{
+ use generic-service
+ host_name lxg0447
+ service_description CONNECT_RES STATUS
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 10
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc_status!50501!connect_res!
+ }
+
+####################### lxg0451 ###########################
+# remote disk check
+define service{
+ use generic-service
+ host_name lxg0451
+ service_description /DATA.LOCAL2
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh_lxg0451!15%!10%!/data.local2!
+ }
+
+# remote process (connect_res) check
+define service{
+ use generic-service
+ host_name lxg0451
+ service_description CONNECT_RES
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 60
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc_by_ssh!connect_res!
+ }
+
+# remote process (connect_res) check status
+define service{
+ use generic-service
+ host_name lxg0451
+ service_description CONNECT_RES STATUS
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 10
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc_status!50501!connect_res!
+ }
+
+####################### hadeb07 ###########################
+# remote disk check
+define service{
+ use generic-service
+ host_name hadeb07
+ service_description DISK_AB TEST
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_smartctl_temp_by_ssh!/dev/sda!/dev/sdb!50!60!
+ }
+
+define service{
+ use generic-service
+ host_name hadeb07
+ service_description DISK_CD TEST
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_smartctl_temp_by_ssh!/dev/sdc!/dev/sdd!50!60!
+ }
+
+define service{
+ use generic-service
+ host_name hadeb07
+ service_description BACKUP
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc_status!50501!check_backup!
+ }
+
+###################### hadc08 #############################
+#check process: sound_server.pl
+define service{
+ use generic-service
+ host_name hadc08
+ service_description SOUND_SERVER
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 10
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 120
+ notification_period 24x7
+ notification_options c,r
+ check_command check_proc2_by_ssh!sound_server.pl!
+ }
+
+###################### hades25 ############################
+## local disk test
+define service{
+ use generic-service
+ host_name hades25
+ service_description DISK TEST
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_smartctl_temp_by_ssh!/dev/sda!/dev/sdb!50!60!
+ }
+
+# remote raid check
+define service{
+ use generic-service
+ host_name hades25
+ service_description RAID1
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_raid_by_ssh
+ }
+
+#check disk space
+define service{
+ use generic-service
+ host_name hades25
+ service_description /
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh!20%!10%!/!
+ }
+
+###################### hades17 ############################
+## local disk test
+define service{
+ use generic-service
+ host_name hades17
+ service_description DISK TEST
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_smartctl_temp_by_ssh!/dev/sda!/dev/sdb!60!70!
+ }
+
+# remote raid check
+define service{
+ use generic-service
+ host_name hades17
+ service_description RAID1
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_raid_by_ssh
+ }
+
+#check disk space
+define service{
+ use generic-service
+ host_name hades17
+ service_description /
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh!20%!10%!/!
+ }
+
+###################### hades27 ############################
+## remote disk test
+define service{
+ use generic-service
+ host_name hades27
+ service_description DISK TEST
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_smartctl_temp_by_ssh!/dev/sda!/dev/sdb!60!70!
+ }
+
+# remote raid check
+define service{
+ use generic-service
+ host_name hades27
+ service_description RAID1
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_raid_by_ssh
+ }
+
+#check disk space
+define service{
+ use generic-service
+ host_name hades27
+ service_description /
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_disk_by_ssh!20%!10%!/!
+ }
+
+#check EPICS
+define service{
+ use generic-service
+ host_name hadsc1
+ service_description EPICS test
+ is_volatile 0
+ check_period 24x7
+ max_check_attempts 3
+ normal_check_interval 1440
+ retry_check_interval 1
+ contact_groups linux-admins
+ notification_interval 1440
+ notification_period 24x7
+ notification_options c,r
+ check_command check_epics!HAD:hadsc1:scan1!
+ }
--- /dev/null
+#!/usr/bin/perl -w
+# ---------------------------------------------------------------------------
+# File Name: my_check_archivist.pl
+# Author: Sergey Yurevich
+# Date: 16/01/2007
+# Version: 0.1
+# Description: script will check to see if there
+# is a message from archivist
+# ---------------------------------------------------------------------------
+
+use strict;
+use warnings;
+use IO::Socket;
+use lib '/usr/local/nagios/libexec/';
+use utils qw($TIMEOUT %ERRORS &print_revision &support);
+
+@ARGV == 2 or die "usage: my_check_archivist.pl host_ip host_port\n";
+
+my ($remote_host, $remote_port) = @ARGV;
+
+#my $remote_host = 'lxhadesdaq.gsi.de';
+#my $remote_port = '60006';
+my $protocol = 'tcp';
+my $state;
+my $answer = "";
+
+my $socket = IO::Socket::INET->new(PeerAddr => $remote_host,
+ PeerPort => $remote_port,
+ Proto => $protocol,
+ Type => SOCK_STREAM)
+ or $answer = "CRITICAL - no response from archivist at $remote_host:$remote_port";
+
+if($answer){
+ $state = $ERRORS{'CRITICAL'};
+}
+else{
+ $answer = <$socket>;
+
+ close($socket);
+
+ if($answer =~/OK/){
+ $state = $ERRORS{'OK'};
+ }
+ elsif($answer =~/WARNING/){
+ $state = $ERRORS{'WARNING'};
+ }
+ elsif($answer =~/CRITICAL/){
+ $state = $ERRORS{'CRITICAL'};
+ }
+ elsif($answer){
+ $state = $ERRORS{'UNKNOWN'};
+ }
+}
+
+if($state == $ERRORS{'OK'}){
+ print "$answer\n";
+}
+elsif($state == $ERRORS{'WARNING'}){
+ print "$answer\n";
+}
+elsif($state == $ERRORS{'CRITICAL'}){
+ print "$answer\n";
+}
+elsif($state == $ERRORS{'UNKNOWN'}){
+ print "UNKNOWN - $answer\n";
+}
+
+exit $state;
+
+
+
+
--- /dev/null
+#!/usr/bin/perl -w
+# ------------------------------------------------------------------------------
+# File Name: chech_raid.pl
+# Author: Thomas Nilsen - Norway
+# Date: 14/06/2003
+# Version: 0.1
+# Description: This script will check to see if any software raid
+# devices are down.
+# Email: thomas.nilsen@doc-s.co.uk
+# WWW: www.doc-s.co.uk
+# ------------------------------------------------------------------------------
+# Copyright 2003 (c) Thomas Nilsen
+# Credits go to Ethan Galstad for coding Nagios
+# License GPL
+# ------------------------------------------------------------------------------
+# Date Author Reason
+# ---- ------ ------
+# 14/06/2003 TN Initial Release
+# - Format of mdstat assumed to be "2 line" per
+# device with [??] on the second line.
+# ------------------------------------------------------------------------------
+
+use strict;
+use warnings;
+use Getopt::Long;;
+use vars qw($opt_V $opt_h $opt_t $opt_F $PROGNAME);
+use lib '/usr/local/nagios/libexec/';
+use utils qw($TIMEOUT %ERRORS &print_revision &support);
+
+$PROGNAME="check_raid";
+
+$ENV{'PATH'}='';
+$ENV{'BASH_ENV'}='';
+$ENV{'ENV'}='';
+my ( $line, $prevline, $stat, $state ,@device, $msg, $status, $timeout);
+
+$stat="/proc/mdstat";
+
+#Option checking
+Getopt::Long::Configure('bundling');
+$status = GetOptions(
+ "V" => \$opt_V, "version" => \$opt_V,
+ "h" => \$opt_h, "help" => \$opt_h,
+ "F" => \$opt_F, "filename" => \$opt_F,
+ "t" => \$opt_t, "timeout" => \$opt_t);
+# Version
+if ($opt_V) {
+ print_revision($PROGNAME,'$Revision: 1.1 $');
+ exit $ERRORS{'OK'};
+}
+# Help
+if ($opt_h) {
+ #print_help();
+ exit $ERRORS{'OK'};
+}
+# Filename supplied
+if ($opt_F) {
+ $opt_F = shift;
+ $stat = $1 if ($opt_F =~ /^(.*)$/);
+
+ if ( ! -r $stat ) {
+ print "Invalid mdstat file: $opt_F\n";
+ exit $ERRORS{'UNKNOWN'};
+ }
+}
+
+$timeout = $TIMEOUT;
+($opt_t) && ($opt_t =~ /^([0-9]+)$/) && ($timeout = $1);
+
+# Just in case of problems, let's not hang Nagios
+$SIG{'ALRM'} = sub {
+ print ("ERROR: No response (alarm)\n");
+ exit $ERRORS{'UNKNOWN'};
+};
+alarm($timeout);
+
+# Start checking the file...
+open (FH, $stat);
+$state = $ERRORS{'OK'};
+$msg ="";
+
+# Now check the mdstat file..
+while (<FH>) {
+ $line= $_;
+ if( $line =~ / \[_|_\]|U_|_U /) {
+ $state = $ERRORS{'CRITICAL'};
+ @device = split(/ /,$prevline);
+ $msg = $msg . $device[0] . ": - ";
+ }
+ $prevline = $line;
+}
+close (FH);
+
+if ( $state == $ERRORS{'CRITICAL'} ) {
+ print "CRITICAL - Device(s) $msg have failed\n";
+} elsif ( $state == $ERRORS{'OK'} )
+ { print "OK - All devices are online\n"; }
+exit $state;
+
+
--- /dev/null
+#!/usr/bin/perl -w
+# ----------------------------------------------------------------------------
+# File Name: my_check_process.pl
+# Author: Sergey Yurevich
+# Date: 05/04/2007
+# Version: 0.1
+# Description: script will check if there is a process running
+# ----------------------------------------------------------------------------
+
+use strict;
+use warnings;
+use lib '/usr/local/nagios/libexec/';
+use utils qw($TIMEOUT %ERRORS &print_revision &support);
+
+@ARGV == 1 or die "usage: my_check_dhcp.pl ip\n";
+
+my ($ip) = @ARGV;
+
+#dhcping - check dhcp service
+#-c 140.181.67.143 - from hadesdaq (140.181.67.143)
+#-s 140.181.75.158 - on lxhadesdaq (140.181.75.158)
+#-h 00:40:9E:00:99:E2 - if one gets an answer from MAC address (00:40:9E:00:99:E2)
+
+my $answer = `dhcping -c 140.181.67.143 -s $ip -h 00:40:9E:00:99:E2`;
+chop($answer);
+
+my $state;
+
+if($answer =~/Got answer from/){
+ $state = $ERRORS{'OK'};
+}
+else{
+ $state = $ERRORS{'CRITICAL'};
+}
+
+if($state == $ERRORS{'OK'}){
+ print "OK - dhcp is running\n";
+}
+elsif($state == $ERRORS{'CRITICAL'}){
+ print "CRITICAL - dhcp is not running!\n";
+}
+
+exit $state;
+
+
+
+
--- /dev/null
+#! /usr/bin/perl -w
+# ----------------------------------------------------------------------------
+# File Name: my_check_disk_smartctl.pl
+# Author: Sergey Yurevich
+# Date: 16/01/2007
+# Version: 0.1
+# Description: script will perform SMART overall-health
+# self-assessment test + temperature check.
+# ----------------------------------------------------------------------------
+
+use strict;
+use Data::Dumper;
+use lib '/usr/local/nagios/libexec/' ;
+use utils qw($TIMEOUT %ERRORS &print_revision &support &usage);
+use Getopt::Long;
+
+Getopt::Long::Configure( 'bundling' );
+
+my (@devices, $temper, $warntemp, $crittemp);
+
+GetOptions( "d|devices=s" => \@devices,
+ "t|temper" => \$temper,
+ "w|warning=i" => \$warntemp,
+ "c|critical=i" => \$crittemp );
+
+my $SMARTCTL = "/usr/sbin/smartctl";
+my $state;
+
+my $stateCrit = -1;
+my $stateWarn = -1;
+my $stateUnkn = -1;
+
+die "usage: my_check_disk_smartctl.pl [--temper -w 50 -c 60] -d /dev/sda -d /dev/sdb ...\n" unless @devices;
+
+#- loop over disks
+foreach my $disk (@devices)
+{
+ $state = -1;
+
+ #- valid devices: /dev/hda,..., /dev/sda,...
+ unless ($disk =~ /(\/dev\/[hs]d[0-9a-z]+)/){
+ print "ERROR: Invalid disk: $disk\n";
+ exit $ERRORS{'UNKNOWN'};
+ }
+
+ if( $temper )
+ {
+ my $temperature = &check_temperature( $disk );
+
+ if( $temperature > $crittemp )
+ {
+ print "CRIRICAL! $disk: temperature is $temperature ";
+ $stateCrit = $ERRORS{'CRITICAL'};
+ }
+ elsif( $temperature > $warntemp )
+ {
+ print "WARNING! $disk: temperature is $temperature ";
+ $stateWarn = $ERRORS{'WARNING'};
+ }
+ elsif( $temperature eq "" )
+ {
+ print "CRIRICAL! No output from smartctl -A $disk ";
+ $stateCrit = $ERRORS{'CRITICAL'};
+ }
+ elsif( $temperature == -1 )
+ {
+ print "CRIRICAL! Temperature check failed! ";
+ $stateCrit = $ERRORS{'CRITICAL'};
+ }
+ else
+ {
+ print "OK! $disk: temperature = $temperature ";
+ }
+ }
+
+ my $command = "$SMARTCTL -H $disk";
+ my $status = `$command`;
+
+ #$status = "hgftrefsd FAIL";
+
+ if ($status eq "") {
+ print "ERROR: no output from '$command'\n";
+ $state = $ERRORS{'CRITICAL'};
+ }
+
+ #- $ok gets equal the last line of smartctl output if PASSED...
+ my $ok = $1 if $status =~ /\n(.*?test result: PASSED\n)/i or
+ $status =~ /\n(.*?Sense: Ok!\n)/i;
+ if ($ok){
+ #print "$disk: $ok";
+ print "OK! $disk: SMART health test: PASSED ";
+ $state = $ERRORS{'OK'};
+ }
+
+ #- $fail gets equal the last line of smartctl output if FAIL...
+ my $fail = $1 if $status =~ /\n(.*?[^WHEN_]FAIL[^ED][^\n]*)/i;
+ if ($fail){
+ print "CRITICAL! $disk: $fail";
+ $state = $ERRORS{'CRITICAL'};
+ }
+
+ #- $old gets equal the last line of smartctl output if OLD...
+ my $old = $1 if $status =~ /\n(.*?OLD[^_age][^\n]*)/i;
+ if ($old){
+ print "WARNING! $disk: $old\n";
+ $state = $ERRORS{'WARNING'};
+ }
+
+ if($state == $ERRORS{'CRITICAL'}){
+ $stateCrit = $ERRORS{'CRITICAL'};
+ }
+ elsif($state == $ERRORS{'WARNING'}){
+ $stateWarn = $ERRORS{'WARNING'};
+ }
+ elsif($state == -1){
+ print "UNKNOWN! Check manually: $SMARTCTL -H $disk ";
+ $stateUnkn = $ERRORS{'UNKNOWN'};
+ }
+}
+
+if($stateCrit == $ERRORS{'CRITICAL'}){
+ exit $stateCrit;
+}
+elsif($stateUnkn == $ERRORS{'UNKNOWN'}){
+ exit $stateUnkn;
+}
+elsif($stateWarn == $ERRORS{'WARNING'}){
+ exit $stateWarn;
+}
+else{
+ exit $ERRORS{'OK'};
+}
+
+sub check_temperature
+{
+ my ($disk) = @_;
+
+ my $command = "$SMARTCTL -A $disk";
+ my $temperature = -1;
+
+ my @status = `$command`;
+
+ #print Dumper @status;
+
+ foreach my $line ( @status )
+ {
+ chop( $line );
+ if( $line =~ "194 Temperature_Celsius" )
+ {
+ my @words = split(/ +/, $line);
+ $temperature = $words[9];
+ }
+ }
+
+ return $temperature;
+}
+
+
--- /dev/null
+#!/usr/bin/perl -w
+
+########################################################
+#
+# Author: S.Y.
+#
+# This script checks Event Builder log file and
+# estimates the number of files with discarded events
+# above a given threshold
+#
+########################################################
+
+use strict;
+use Data::Dumper;
+use Tie::File;
+use Fcntl;
+use IO::Handle;
+
+my $i;
+my @lines;
+my $line;
+
+my $file2read = sprintf("%s_s.tcl", $ENV{DAQ_SETUP});
+
+tie(@lines, 'Tie::File', $file2read, mode => O_RDONLY)
+ or die "Cannot tie file $file2read: $!\n";
+
+
+#- the file info is searched only for files created during:
+my $last_minutes = 60; #last 60 minutes
+
+#- get current time in iso format
+my ($y, $m, $d, $hh, $mm, $ss) = (localtime)[5,4,3,2,1,0]; $y += 1900; $m++;
+my $iso_now = sprintf("%d-%02d-%02d %02d:%02d:%02d", $y, $m, $d, $hh, $mm, $ss);
+
+#- init counters
+my $filenum = 0;
+my $errfilenum1 = 0; #file with many evtsDiscarded
+my $errfilenum2 = 0; #file with many evtsDataError
+my $errfilenum3 = 0; #file with many evtsTagError
+
+my ($evtsComplete, $evtsDiscarded, $evtsDataError, $evtsTagError);
+
+#- status info for Nagios
+my $status;
+
+#--- loop over all lines backward in the file2read
+for ( $i = $#lines; $i > 1; $i--){
+
+ $line = $lines[$i];
+
+ #- look for a line with "stopdate"
+ if ( $line =~ /stopdate/){
+
+ #- check the number of problematic events in file
+ if ($filenum > 0 && $evtsComplete > 0) {
+
+ #- estimate amount of discarded events
+ my $ratio1 = $evtsDiscarded/$evtsComplete;
+ my $ratio2 = $evtsDataError/$evtsComplete;
+ my $ratio3 = $evtsTagError/$evtsComplete;
+
+ #print "evtsComplete = $evtsComplete, evtsDiscarded = $evtsDiscarded, ratio1 = $ratio1\n";
+
+ if ($ratio1 > 0.1) {
+ $errfilenum1++;
+ }
+ if ($ratio2 > 0.1) {
+ $errfilenum2++;
+ }
+ if ($ratio3 > 0.1) {
+ $errfilenum3++;
+ }
+ }
+
+ #- extract stop date from the line (format: "2007-05-05T19:32:53")
+ my ($v1, $v2, $stop_date) = split(" ", $line);
+
+ #- get rid of ""
+ $stop_date =~ s/\"//g;
+
+ #-get rid of "T"
+ $stop_date =~ s/T/ /;
+
+ #- get time difference (in minutes)
+ my $time_diff = &timeDiff( date1 => $stop_date, date2 => $iso_now );
+
+ #print "stop_date = $stop_date, time_diff = $time_diff\n";
+
+ #- look for a recent hour
+ if ( $time_diff > $last_minutes) {
+
+ if ($filenum == 0) {
+ $status = "OK - no new files in a log during last $last_minutes min.";
+ last;
+ }
+ elsif ($filenum > 0) {
+ my $persent1 = $errfilenum1/$filenum;
+ my $persent2 = $errfilenum2/$filenum;
+ my $persent3 = $errfilenum3/$filenum;
+
+ # if number of files with discarded events above threshold
+ # exceeds 10% -> send a WARNING
+ if ($persent1 > 0.1) {
+ $status = "WARNING - $persent1 files with discarded events during last $last_minutes min.";
+ last;
+ }
+ elsif ($persent2 > 0.1) {
+ $status = "WARNING - $persent2 files with data error during last $last_minutes min.";
+ last;
+ }
+ elsif ($persent3 > 0.1) {
+ $status = "WARNING - $persent3 files with tag error during last $last_minutes min.";
+ last;
+ }
+ else {
+ $status = "OK - $persent1 files with discarded events during last $last_minutes min.";
+ last;
+ }
+ }
+ } #if ( $time_diff > 60.)
+ else {
+
+ #- increment filenum counter
+ $filenum++;
+ }
+ } #if ( $line =~ /stopdate/){
+ else {
+
+ my ($v1, $v2);
+
+ if($line =~ /evtsComplete/) {
+ ($v1, $v2, $evtsComplete) = split(" ", $line);
+ }
+ if($line =~ /evtsDiscarded/) {
+ ($v1, $v2, $evtsDiscarded) = split(" ", $line);
+ }
+ if($line =~ /evtsDataError/) {
+ ($v1, $v2, $evtsDataError) = split(" ", $line);
+ }
+ if($line =~ /evtsTagError/) {
+ ($v1, $v2, $evtsTagError) = split(" ", $line);
+ }
+ }
+}
+
+print "status for Nagios: $status\n";
+
+sub timeDiff (%) {
+ #
+ # this subroutine calculates the time difference in minutes
+ # usage: $timeDiffStr = &timeDiff( date1 => $sale_time, date2 => $iso_now );
+ # time format: $sale_time = "2007-05-05 19:32:53";
+ #
+
+ my %args = @_;
+
+ my @offset_days = qw(0 31 59 90 120 151 181 212 243 273 304 334);
+
+ my $year1 = substr($args{'date1'}, 0, 4);
+ my $month1 = substr($args{'date1'}, 5, 2);
+ my $day1 = substr($args{'date1'}, 8, 2);
+ my $hh1 = substr($args{'date1'},11, 2) || 0;
+ my $mm1 = substr($args{'date1'},14, 2) || 0;
+ my $ss1 = substr($args{'date1'},17, 2) if (length($args{'date1'}) > 16);
+ $ss1 ||= 0;
+
+ my $year2 = substr($args{'date2'}, 0, 4);
+ my $month2 = substr($args{'date2'}, 5, 2);
+ my $day2 = substr($args{'date2'}, 8, 2);
+ my $hh2 = substr($args{'date2'},11, 2) || 0;
+ my $mm2 = substr($args{'date2'},14, 2) || 0;
+ my $ss2 = substr($args{'date2'},17, 2) if (length($args{'date2'}) > 16);
+ $ss2 ||= 0;
+
+ my $total_days1 = $offset_days[$month1 - 1] + $day1 + 365 * $year1;
+ my $total_days2 = $offset_days[$month2 - 1] + $day2 + 365 * $year2;
+ my $days_diff = $total_days2 - $total_days1;
+
+ my $seconds1 = $total_days1 * 86400 + $hh1 * 3600 + $mm1 * 60 + $ss1;
+ my $seconds2 = $total_days2 * 86400 + $hh2 * 3600 + $mm2 * 60 + $ss2;
+
+ my $ssDiff = $seconds2 - $seconds1;
+
+ my $dd = int($ssDiff / 86400);
+ my $hh = int($ssDiff / 3600) - $dd * 24;
+ my $mm = int($ssDiff / 60) - $dd * 1440 - $hh * 60;
+ my $ss = int($ssDiff / 1) - $dd * 86400 - $hh * 3600 - $mm * 60;
+
+ my $totminutes = int($ssDiff / 60);
+ "$totminutes";
+}
--- /dev/null
+#!/usr/bin/perl -w
+# ------------------------------------------------------------------------------
+# File Name: my_check_eblog_status.pl
+# Author: Sergey Yurevich
+# Date: 16/01/2007
+# Version: 0.1
+# Description: script checks the status of the process (alive/dead)
+# ------------------------------------------------------------------------------
+
+use strict;
+use warnings;
+use IO::Socket;
+use lib '/usr/local/nagios/libexec/';
+use utils qw($TIMEOUT %ERRORS &print_revision &support);
+
+@ARGV == 2 or die "usage: my_check_eblog_status.pl host_ip host_port\n";
+
+my ($remote_host, $remote_port) = @ARGV;
+
+#my $remote_host = 'lxhadesdaq.gsi.de';
+#my $remote_port = '60006';
+my $protocol = 'tcp';
+my $state;
+my $answer = "";
+
+my $socket = IO::Socket::INET->new(PeerAddr => $remote_host,
+ PeerPort => $remote_port,
+ Proto => $protocol,
+ Type => SOCK_STREAM)
+ #or die "Couldn't connect to $remote_host:$remote_port : $@\n";
+ or $answer = "WARNING - no response from my_check_eblog at $remote_host:$remote_port";
+
+if($answer){
+ $state = $ERRORS{'WARNING'};
+}
+else{
+ $answer = <$socket>;
+
+ close($socket);
+
+ if($answer =~/OK/){
+ #nagios exit code 0 = status OK = green
+ $state = $ERRORS{'OK'};
+ }
+ elsif($answer =~/WARNING/){
+ $state = $ERRORS{'WARNING'};
+ }
+ elsif($answer =~/CRITICAL/){
+ $state = $ERRORS{'CRITICAL'};
+ }
+ elsif($answer){
+ #nagios exit code 2 = status CRITICAL = red
+ $state = $ERRORS{'UNKNOWN'};
+ }
+}
+
+if($state == $ERRORS{'OK'}){
+ print "$answer\n";
+}
+elsif($state == $ERRORS{'WARNING'}){
+ print "$answer\n";
+}
+elsif($state == $ERRORS{'CRITICAL'}){
+ print "$answer\n";
+}
+elsif($state == $ERRORS{'UNKNOWN'}){
+ print "UNKNOWN - $answer\n";
+}
+
+exit $state;
+
+
+
+
--- /dev/null
+#!/usr/bin/perl -w
+
+#BEGIN{
+
+# push @INC, "/usr/lib/perl5/site_perl/5.8.0/i586-linux-thread-multi";
+
+#}
+
+=head1 NAME
+
+check_ping.pl - pings a host and returns statistics data.
+
+=head1 VERSION
+
+Version 1.0
+
+=head1 AUTHOR
+
+(c) 2003 Hannes Schulz <mail@hannes-schulz.de>
+
+=head1 SYNOPSIS
+
+ ./check_ping.pl --host <host> --loss <warn>,<crit> --rta <warn>,<crit>
+ [--timeout <seconds>] [--packages <packages>]
+
+=head1 DESCRIPTION
+
+This pings a host via the C<Net::Ping> module from CPAN and returns
+RTA and loss.
+
+=cut
+
+use strict;
+
+use Getopt::Long;
+use Pod::Usage;
+use Net::Ping;
+
+my ($host,$aloss,$arta,$timeout,$pack);
+GetOptions(
+ "H|host=s", \$host,
+ "l|loss=s", \$aloss,
+ "r|rta=s", \$arta,
+ "t|timeout=i", \$timeout,
+ "p|packages=i",\$pack
+);
+
+pod2usage("$0: No host given!\n") unless($host);
+pod2usage("$0: Parameter syntax error!\n") unless($aloss =~ /^\d+,\d+$/o);
+pod2usage("$0: Parameter syntax error!\n") unless($arta =~ /^\d+,\d+$/o);
+
+my ($wloss,$closs) = split /,/,$aloss;
+my ($wrta,$crta) = split /,/,$arta;
+
+pod2usage("$0: Warning > Critical!\n") unless($wloss<$closs);
+pod2usage("$0: Warning > Critical!\n") unless($wrta<$crta);
+
+$pack ||= 5;
+$timeout ||= ($pack*3.5);
+
+my $p = Net::Ping->new("tcp",$timeout/$pack);
+$p->hires(1);
+
+my ($ret, $duration, $ip, $nok, $dur);
+$nok = 0; $dur = 0;
+for(1..$pack){
+ ($ret, $duration, $ip) = $p->ping($host);
+ $nok++ if(!$ret);
+ $dur += $duration;
+ $p->close();
+}
+
+my $rta = 1000 * $dur/$pack;
+my $loss = 100 * $nok/$pack;
+
+printf("PING - Packet loss = %i%%, RTA = %.2f ms\n", $loss, $rta);
+
+exit(2) if($rta>$crta or $loss>$closs); # Nagios: Critical
+exit(1) if($rta>$wrta or $loss>$wloss); # Nagios: Warning
+exit(0); # Nagios: OK
--- /dev/null
+#!/usr/bin/perl -w
+# ----------------------------------------------------------------------------
+# File Name: my_check_proc_status.pl
+# Author: Sergey Yurevich
+# Date: 16/01/2007
+# Version: 0.1
+# Description: script checks the status of the process (alive/dead)
+# ----------------------------------------------------------------------------
+
+use strict;
+use warnings;
+use IO::Socket;
+use lib '/usr/local/nagios/libexec/';
+use utils qw($TIMEOUT %ERRORS &print_revision &support);
+
+@ARGV == 3 or die "usage: my_check_proc_status.pl host_ip host_port proc_name\n";
+
+my ($remote_host, $remote_port, $proc_name) = @ARGV;
+
+#my $remote_host = 'lxhadesdaq.gsi.de';
+#my $remote_port = '60006';
+my $protocol = 'tcp';
+my $state;
+my $answer = "";
+
+my $socket = IO::Socket::INET->new(PeerAddr => $remote_host,
+ PeerPort => $remote_port,
+ Proto => $protocol,
+ Type => SOCK_STREAM)
+ or $answer = "CRITICAL - no response from $proc_name at $remote_host:$remote_port";
+
+if($answer){
+ $state = $ERRORS{'CRITICAL'};
+}
+else{
+ $answer = <$socket>;
+
+ close($socket);
+
+ if($answer =~/OK/){
+ $state = $ERRORS{'OK'};
+ }
+ elsif($answer =~/WARNING/){
+ $state = $ERRORS{'WARNING'};
+ }
+ elsif($answer =~/CRITICAL/){
+ $state = $ERRORS{'CRITICAL'};
+ }
+ elsif($answer){
+ $state = $ERRORS{'UNKNOWN'};
+ }
+}
+
+if($state == $ERRORS{'OK'}){
+ print "$answer\n";
+}
+elsif($state == $ERRORS{'WARNING'}){
+ print "$answer\n";
+}
+elsif($state == $ERRORS{'CRITICAL'}){
+ print "$answer\n";
+}
+elsif($state == $ERRORS{'UNKNOWN'}){
+ print "UNKNOWN - $answer\n";
+}
+
+exit $state;
+
+
+
+
--- /dev/null
+#!/usr/bin/perl -w
+# ----------------------------------------------------------------------------
+# File Name: my_check_process.pl
+# Author: Sergey Yurevich
+# Date: 05/04/2007
+# Version: 0.1
+# Description: script will check if there is a process running
+# ----------------------------------------------------------------------------
+
+use strict;
+use warnings;
+use lib '/usr/local/nagios/libexec/';
+use utils qw($TIMEOUT %ERRORS &print_revision &support);
+
+@ARGV == 1 or die "usage: my_check_process.pl process_name\n";
+
+my ($process_name) = @ARGV;
+
+my $pids = `pidof -x $process_name`;
+chop($pids);
+
+my $state;
+
+if($pids){
+ $state = $ERRORS{'OK'};
+}
+else{
+ $state = $ERRORS{'CRITICAL'};
+}
+
+if($state == $ERRORS{'OK'}){
+ print "OK - pid of $process_name is $pids\n";
+}
+elsif($state == $ERRORS{'CRITICAL'}){
+ print "CRITICAL - $process_name is not running!\n";
+}
+
+exit $state;
+
+
+
+
--- /dev/null
+#!/usr/bin/perl -w
+# ----------------------------------------------------------------------------
+# File Name: my_check_process.pl
+# Author: Sergey Yurevich
+# Date: 05/04/2007
+# Version: 0.1
+# Description: script will check if there is a process running
+# ----------------------------------------------------------------------------
+
+use strict;
+use warnings;
+#use lib '/usr/local/nagios/libexec/';
+use lib '/misc/hadaq/nagios/nagios-plugins-1.4.5/plugins-scripts/';
+use utils qw($TIMEOUT %ERRORS &print_revision &support);
+
+@ARGV == 3 or die "usage: my_check_process.pl process_name number_of_processes status_level\n";
+
+# process_name - name of process to be checked.
+# status_level - the return STATUS if process is not found.
+# number_of_processes - exact number of running processes to be checked.
+
+my ($process_name, $proc_num, $status_level) = @ARGV;
+
+my $pids = `pidof -x $process_name`;
+chop($pids);
+
+#print "pids = $pids\n";
+my @pid_list = split(' ',$pids);
+
+my $run_proc_num = $#pid_list+1; #number of running processes with name $process_name
+
+my $state;
+
+if($pids){
+ $state = $ERRORS{'OK'};
+}
+else{
+ $state = $ERRORS{'CRITICAL'};
+}
+
+if($state == $ERRORS{'OK'}){
+ if($proc_num == $run_proc_num){
+ print "OK - pid of $process_name is $pids\n";
+
+ exit $state;
+ }
+ elsif($proc_num > $run_proc_num){
+ print "$status_level - pid of $process_name is $pids, too few processes!\n";
+
+ #the following is needed because of passed status_level to the plugin script.
+ if($status_level eq "CRITICAL") {
+ exit $ERRORS{'CRITICAL'};
+ }
+ elsif($status_level eq "WARNING") {
+ exit $ERRORS{'WARNING'};
+ }
+ }
+ else{
+ print "$status_level - pid of $process_name is $pids, too many processes!\n";
+
+ #the following is needed because of passed status_level to the plugin script.
+ if($status_level eq "CRITICAL") {
+ exit $ERRORS{'CRITICAL'};
+ }
+ elsif($status_level eq "WARNING") {
+ exit $ERRORS{'WARNING'};
+ }
+ }
+}
+elsif($state == $ERRORS{'CRITICAL'}){
+ print "$status_level - $process_name is not running!\n";
+
+ #the following is needed because of passed status_level to the plugin script.
+ if($status_level eq "CRITICAL") {
+ exit $ERRORS{'CRITICAL'};
+ }
+ elsif($status_level eq "WARNING") {
+ exit $ERRORS{'WARNING'};
+ }
+}
+
+
+
+
+
--- /dev/null
+#!/bin/sh
+#
+##############################################################################
+##############################################################################
+## Nagios plugin to check EPICS PV Status ##
+##############################################################################
+##############################################################################
+#
+# Script to retrieve EPICS PV Name status using the "caget" command.
+# Written by Mauro Giacchini (mauro.giacchini@lnl.infn.it)
+# Last Modified: 17-11-2007
+#
+# Usage: ./check_caget.sh -pv <PV name>
+#
+# Description:
+# This script uses caget command to retrieve the PV status.
+#
+# Limitations:
+# This script has been tested on Linux Fedora Core 6.
+#
+# Output:
+# The output contains the "te" time elapsed
+# calculated like a difference from PV's
+# timestamp and the linux "date" command (suggestion: use ntp common server
+# to IOCs and Nagios server box). The STATUS of the service (..of the PV)
+# follow the severity rules:
+#
+# Severity (none) >>>> STATE_OK # OK = green
+#
+# Severity MINOR >>>> STATE_WARNING # WARNING = yellow
+#
+# Severity MAJOR >>>> STATE_CRITICAL # CRITICAL = red
+#
+# PV not found >>>> STATE_UNKNOWN # UNKNOWNN = orange
+#
+# In case of Severity (none) it show the stdout of
+# "caget -a" with appended the "te".
+#
+# Other notes:
+# Firefox Plugin : A FireFox extension is avilable to monitor Nagios server.
+# https://addons.mozilla.org/it/firefox/addon/3607
+#
+# Nagios configuration setup:
+# You need to add the command to commands.cfg
+#
+# define command{
+# command_name check_caget
+# command_line $USER1$/check_caget.sh -pv $ARG1$
+# }
+#
+# And, you need to add the service to services.cfg
+#
+# define service{
+# use generic-service ;
+# host_name IOC_Example ;
+# service_description aiExample ;
+# is_volatile 0 ;
+# check_period 24x7 ;
+# max_check_attempts 3 ;
+# normal_check_interval 3 ;
+# retry_check_interval 1 ;
+# contact_groups admins ;
+# notification_interval 120 ;
+# notification_period 24x7 ;
+# notification_options w,u,c,r ;
+# check_command check_caget!rootHost:aiExample ;
+# }
+#
+# then place this script in the /usr/lib/nagios/plugins/
+# on the Nagios box server.
+# Don't forget to set the right execution permission to this file.
+#
+# Threshold and ranges: please, have a look at:
+# http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
+#
+# Last: This script still needs debugging and fixups (exercise for reader) :-)
+#
+##############################################################################
+# DEBUGGING OPTION
+# This option determines whether or not debugging messages are showed
+# Values: 0=debugging off, 1=debugging on
+
+DEBUG="0"
+
+
+##############################################################################
+# CAGET LOCATION
+# This option determines where the caget executable is located.
+# The default /usr/bin/caget should be made with a symbolic link
+# made by root (i.e.): ln -s /opt/epics/base-3.14.9/bin/linux-x86/caget /usr/bin/caget
+
+
+CAGET_LOCATION=/home/scs/epics/base-3.14.9/bin/linux-x86/caget
+
+
+##############################################################################
+# Script exit status
+
+STATE_OK=0 # OK = green
+
+STATE_WARNING=1 # WARNING = yellow
+
+STATE_CRITICAL=2 # CRITICAL = red
+
+STATE_UNKNOWN=3 # UNKNOWNN = orange
+
+VERSION="v1.3"
+
+##############################################################################
+# print_revision() function
+
+print_revision (){
+
+ echo "Check_caget (nagios-plugins 1.4 to nagios 2.9) (EPICS base 3.14.9) $VERSION"
+}
+
+##############################################################################
+# print_usage() function
+
+print_usage() {
+
+ echo ""
+ echo "Usage: check_caget_dev_gw -pv <PV name> "
+ echo "Usage: check_caget_dev_gw -pv <PV name> -H <EPICS_CA_ADDR_LIST>"
+ echo "Usage: check_caget_dev_gw -pv <PV name> -p <EPICS_CA_SERVER_PORT>"
+ echo "Usage: check_caget_dev_gw -pv <PV name> -expval <EXPECTED VALUE>"
+ echo "Usage: check_caget_dev_gw [-h] [--help]"
+ echo "Usage: check_caget_dev_gw [-V]"
+ echo ""
+}
+
+#####################################################################################
+# print_help() function
+
+print_help() {
+ echo ""
+ print_usage
+ echo ""
+ echo "Script to retrieve the PV status for EPICS control systems."
+ echo ""
+ echo "This plugin not developped by the Nagios Plugin group."
+ echo "Please do not e-mail them for support on this plugin, since"
+ echo "they won't know what you're talking about :P"
+ echo ""
+ echo "For contact info: mauro.giacchini@lnl.infn.it"
+ echo "Download : http://www.lnl.infn.it/~epics/"
+ echo ""
+}
+
+##############################################################################
+# Check the caget presence.
+
+
+verify_caget_presence() {
+
+
+if ! type $CAGET_LOCATION >/dev/null 2>&1; then
+
+ echo "STATUS CRITICAL: caget not found (Did you set up the rigth one Nagios USERn? _or_ caget not found!)"
+ exit $STATE_CRITICAL
+fi
+}
+
+
+##############################################################################
+# Control caget plugin input parameters
+
+EXPVAL=""
+EPICS_CA_ADDR_LIST="" # Default YES
+EPICS_CA_SERVER_PORT="" # Default 5064 _and_ value > 5000
+EPICS_CA_SERVER_PORT_MIN="5000"
+
+while test -n "$1"; do
+
+ case "$1" in
+
+ --help)
+ print_help
+ exit $STATE_OK
+ ;;
+
+ -h)
+ print_help
+ exit $STATE_OK
+ ;;
+
+ -V)
+ print_revision
+ exit $STATE_OK
+ ;;
+
+ -pv)
+ PVNAME=$2
+ shift
+ ;;
+
+ -expval)
+ EXPVAL=$2
+ if [ -z $EXPVAL ]; then
+ echo "STATUS CRITICAL: Expected value absent"
+ exit $STATE_CRITICAL
+ fi
+ shift
+ ;;
+
+ -H)
+ EPICS_CA_ADDR_LIST=$2
+ if [ -z $EPICS_CA_ADDR_LIST ]; then
+ echo "STATUS CRITICAL: Expected EPICS_CA_ADDR_LIST absent"
+ exit $STATE_CRITICAL
+ fi
+ export EPICS_CA_ADDR_LIST
+ EPICS_CA_AUTO_ADDR_LIST="NO"
+ export EPICS_CA_AUTO_ADDR_LIST
+ shift
+ ;;
+
+ -p)
+ EPICS_CA_SERVER_PORT=$2
+ if [ -z $EPICS_CA_SERVER_PORT ]; then
+ echo "STATUS CRITICAL: Expected EPICS_CA_SERVER_PORT absent"
+ exit $STATE_CRITICAL
+ fi
+ if [ $EPICS_CA_SERVER_PORT -le $EPICS_CA_SERVER_PORT_MIN ]; then
+ echo "STATUS CRITICAL: Expected EPICS_CA_SERVER_PORT minor than allowed (5001)"
+ exit $STATE_CRITICAL
+ fi
+ export EPICS_CA_SERVER_PORT
+ shift
+ ;;
+
+ *)
+ echo ""
+ echo "Unknow argument: $1"
+ print_usage
+ exit $STATE_UNKNOWN
+ ;;
+
+esac
+shift
+done
+
+
+verify_caget_presence
+
+if [ -z $PVNAME ]; then
+
+ echo "STATUS CRITICAL: PV Name not specified"
+ exit $STATE_CRITICAL
+fi
+
+#####################################################################################
+# FINALLY... RETRIEVING THE VALUES (caget)
+
+
+#CAGET_REPLY=`caget -a $PVNAME`
+CAGET_REPLY=`$CAGET_LOCATION -a $PVNAME`
+
+IFS=" "
+read pvname date time value status severity<<END
+$CAGET_REPLY
+END
+
+if [ -z $pvname ]; then
+
+ echo "STATE_UNKNOWN: $PVNAME not found"
+ exit $STATE_UNKNOWN
+ fi
+
+##############################################################################
+# Calculus difference between the PV timestamp and the actual time
+
+ SPACE=" "
+ dte1=$(date --date "$date$SPACE$time" +%s)
+ dte2=$(date +%s)
+ diffSec=$((dte2-dte1))
+ if ((diffSec < 0)); then abs=-1; else abs=1; fi
+ te=$((diffSec/abs))
+# echo "Time elapsed (sec.): $te"
+
+##############################################################################
+# Output the NAGIOS status using an expected value
+
+if [ $EXPVAL ]; then
+
+ if [[ $value -eq $EXPVAL ]] ;
+ then echo "STATE_OK: Expected value ($EXPVAL) to $pvname match; te: $te sec."
+ exit $STATE_OK;
+ else echo "STATUS CRITICAL: Expected value ($EXPVAL) to $pvname didn't match"
+ exit $STATE_CRITICAL;
+ fi
+fi
+
+##############################################################################
+# Output the NAGIOS status using the Severity field
+case $severity in
+
+ MAJOR)
+ echo "STATUS CRITICAL: $pvname in MAJOR severity status; te: $te sec."
+ exit $STATE_CRITICAL
+ ;;
+
+ MINOR)
+ echo "STATE_WARNING: $pvname in MINOR severity status; te: $te sec."
+ exit $STATE_WARNING
+ ;;
+
+ *)
+ echo "STATE_OK: $pvname $value $date $time $status ; te: $te sec."
+ exit $STATE_OK
+ ;;
+esac
+
+