nagios-2.5-24 (Network Monitoring)

27/Dec/2006 tested on openSUSE 10.2

Nagios is an open source and network resource monitoring software. It can monitor hosts and services that you specify, alerting you when things get bad and when they get better.

RPMs are available for openSuSE10.2. However rpms are not included in CD nor DVD. rpms are only available at the Internet Installation Repository.

Installation

  • Install RPMs from the Net Installation Repository. Open YaST, Software, Installation Source and add the Internet installation repository. Check URL from http://en.opensuse.org/Mirrors_Released_Version
  • Open Software Management at YaST, search by "nagios", and install them all. You will be installing the following packages.
    nagios-2.5-24.i586.rpm
    nagios-nrpe-2.5.2-28.i586.rpm
    nagios-nsca-2.5-39.i586.rpm
    nagios-plugins-1.4.5-5.i586.rpm
    nagios-plugins-extras-1.4.5-5.i586.rpm
    nagios-www-2.5-24.i586.rpm
    nagiosgrapher-1.6.1RC1-6.i586.rpm
    

    Configure Web Interface

    Edit /etc/apache2/conf.d/nagios.conf, and give access to network where you want to open the web interface.
    ScriptAlias /nagios/cgi-bin/ /usr/lib/nagios/cgi/
    <Directory /usr/lib/nagios/cgi/>
       Options ExecCGI
       order deny,allow
       deny from all
       allow from 127.0.0.1 192.168.0.0/24
       AuthName "Nagios Access"
       AuthType Basic
       AuthUserFile /etc/nagios/htpasswd.users
       require valid-user
    </Directory>
    
    Alias /nagios/ /usr/share/nagios/
    <Directory /usr/share/nagios/>
       Options None
       order deny,allow
       deny from all
       allow from 127.0.0.1 192.168.0.0/24
       AuthName "Nagios Access"
       AuthType Basic
       AuthUserFile /etc/nagios/htpasswd.users
       require valid-user
    </Directory>
    
    Edit /etc/apache2/conf.d/nagat.conf, and give the same configuration.
    Alias /nagat/ /usr/share/nagat/
    <Directory /usr/share/nagat/>
       Options None
       order deny,allow
       deny from all
       allow from 127.0.0.1 192.168.0.0/24
       php_flag register_globals on
    </Directory>
    
    This will create a username and password for nagiosamin. If you change the username, you must chage the corresponding lines at cgi.cfg for the user as well.
    # htpasswd2 -c /etc/nagios/htpasswd.users nagiosadmin
    
    Assign a password for guest user
    # htpasswd2 /etc/nagios/htpasswd.users guest
    
    Edit /etc/nagios/cgi.cfg, and make sure use_authentication=1
    ...
    use_authentication=1
    ...
    default_user_name=guest
    ...
    authorized_for_system_information=nagiosadmin
    ...
    authorized_for_configuration_information=nagiosadmin
    ...
    authorized_for_system_commands=nagiosadmin
    ...
    authorized_for_all_services=nagiosadmin,guest
    authorized_for_all_hosts=nagiosadmin,guest
    ...
    authorized_for_all_service_commands=nagiosadmin
    authorized_for_all_host_commands=nagiosadmin
    ...
    
    Restart Apache
    # /etc/init.d/apache2 restart
    

    Configuration

  • This is an example network.
    srv1(nagios) (192.168.66.128)
         |
       firewall (192.168.254.254)-- isp1 (85.115.193.253) -- isp2 (213.248.68.33)
         |
    winxp1(192.168.66.1)
    
  • In this example, we use only one configuration file (minimal.cfg). However, we also can devide into several configuration files.
  • You might configure in this order,
    1. contact, and contact group
    2. host, and host group
    3. service
    4. If you need more services, check available plugin and define commands
  • Edit /etc/nagios/minimal.cfg as,
    # This defines a timeperiod where all times are valid for checks, 
    # notifications, etc.  The classic "24x7" support nightmare. :-)
    
    define timeperiod{
            timeperiod_name 24x7
            alias           24 Hours A Day, 7 Days A Week
            sunday          00:00-24:00
            monday          00:00-24:00
            tuesday         00:00-24:00
            wednesday       00:00-24:00
            thursday        00:00-24:00
            friday          00:00-24:00
            saturday        00:00-24:00
            }
    
    ###############################################################################
    # COMMANDS
    ###############################################################################
    
    # This is a sample service notification command that can be used to send email 
    # notifications (about service alerts) to contacts.
    
    define command{
    	command_name	notify-by-email-disabled
    	command_line	/usr/bin/printf "%b" "***** Nagios  *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | /usr/bin/mail -s "** $NOTIFICATIONTYPE$ alert - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
    	}
    
    # This is a sample host notification command that can be used to send email 
    # notifications (about host alerts) to contacts.
    
    define command{
    	command_name	host-notify-by-email-disabled
    	command_line	/usr/bin/printf "%b" "***** Nagios  *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /usr/bin/mail -s "Host $HOSTSTATE$ alert for $HOSTNAME$!" $CONTACTEMAIL$
    	}
    
    # Command to check to see if a host is "alive" (up) by pinging it
    
    define command{
            command_name    check-host-alive-disabled
            command_line    $USER1$/check_ping -H $HOSTADDRESS$ -w 99,99% -c 100,100% -p 1 
            }
    
    # Generic command to check a device by pinging it
    
    define command{
    	command_name	check_ping-disabled
    	command_line	$USER1$/check_ping -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
    	}
    
    # Command used to check disk space usage on local partitions
    
    define command{
    	command_name	check_local_disk-disabled
    	command_line	$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
    	}
    
    # Command used to check the number of currently logged in users on the
    # local machine
    
    define command{
    	command_name	check_local_users-disabled
    	command_line	$USER1$/check_users -w $ARG1$ -c $ARG2$
    	}
    
    # Command to check the number of running processing on the local machine
    
    define command{
    	command_name	check_local_procs-disabled
    	command_line	$USER1$/check_procs -w $ARG1$ -c $ARG2$
    	}
    
    # Command to check the load on the local machine
    
    define command{
    	command_name	check_local_load-disabled
    	command_line	$USER1$/check_load -w $ARG1$ -c $ARG2$
    	}
    
    
    # Command to check the proxy
    define command{
            command_name    check_proxy
            command_line    $USER1$/check_tcp -H $HOSTADDRESS$ -p 8080
            }
    
    ###############################################################################
    # CONTACTS
    ###############################################################################
    
    # In this simple config file, a single contact will receive all alerts.
    # This assumes that you have an account (or email alias) called
    # "nagios-admin" on the local host.
    
    define contact{
            contact_name                    nagios-admin
            alias                           Nagios Admin
            service_notification_period     24x7
            host_notification_period        24x7
            service_notification_options    w,u,c,r
            host_notification_options       d,r
            service_notification_commands   notify-by-email
            host_notification_commands      host-notify-by-email
            email                           root@localhost
            }
    
    ###############################################################################
    # CONTACT GROUPS
    ###############################################################################
    
    # We only have one contact in this simple configuration file, so there is
    # no need to create more than one contact group.
    
    define contactgroup{
            contactgroup_name       admins
            alias                   Nagios Administrators
            members                 nagios-admin
            }
    
    ###############################################################################
    # HOSTS
    ###############################################################################
    
    # Generic host definition template - This is NOT a real host, just a template!
    
    define host{
            name                            generic-host    ; The name of this host template
            notifications_enabled           1       ; Host notifications are enabled
            event_handler_enabled           1       ; Host event handler is enabled
            flap_detection_enabled          1       ; Flap detection is enabled
            failure_prediction_enabled      1       ; Failure prediction is enabled
            process_perf_data               1       ; Process performance data
            retain_status_information       1       ; Retain status information across program restarts
            retain_nonstatus_information    1       ; Retain non-status information across program restarts
            register                        0       ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
            max_check_attempts      10
            check_period		24x7
            notification_interval   120
            notification_period     24x7
            notification_options    d,r
            contact_groups  admins
            }
    
    # Since this is a simple configuration file, we only monitor one host - the
    # local host (this machine).
    
    define host{
            use                     generic-host            ; Name of host template to use
            host_name               srv1
            alias                   srv1 (192.168.66.128)
            address                 127.0.0.1
            check_command           check-host-alive
            }
    
    define host{
            use                     generic-host
            host_name               winxp1
            alias                   winxp1(192.168.66.1)
            address                 192.168.0.1
            check_command           check-host-alive
            }
    
    define host{
            use                     generic-host
            host_name               firewall
            alias                   firewall (192.168.254.254)
            address                 192.168.254.254
            check_command           check-host-alive
            }
    
    define host{
            use                     generic-host
            host_name               isp1
            alias                   isp1 (85.115.193.253)
            address                 85.115.193.253
    	parents			firewall
            check_command           check-host-alive
            }
    
    define host{
            use                     generic-host
            host_name               isp2
            alias                   isp2 (213.248.68.33)
            address                 213.248.68.33
    	parents			isp1
            check_command           check-host-alive
            }
    
    ###############################################################################
    # HOST GROUPS
    ###############################################################################
    
    # We only have one host in our simple config file, so there is no need to
    # create more than one hostgroup.
    
    define hostgroup{
            hostgroup_name  local
            alias           network nodes
            members         firewall,srv1,winxp1
            }
    
    define hostgroup{
            hostgroup_name  isp
            alias           isp network nodes
            members         isp1,isp2
            }
    
    ###############################################################################
    # SERVICES
    ###############################################################################
    
    # Generic service definition template - This is NOT a real service, just a template!
    
    define service{
            name                            generic-service ; The 'name' of this service template
            active_checks_enabled           1       ; Active service checks are enabled
            passive_checks_enabled          1       ; Passive service checks are enabled/accepted
            parallelize_check               1       ; Active service checks should be parallelized (disabling this can lead to major performance problems)
            obsess_over_service             1       ; We should obsess over this service (if necessary)
            check_freshness                 0       ; Default is to NOT check service 'freshness'
            notifications_enabled           1       ; Service notifications are enabled
            event_handler_enabled           1       ; Service event handler is enabled
            flap_detection_enabled          1       ; Flap detection is enabled
            failure_prediction_enabled      1       ; Failure prediction is enabled
            process_perf_data               1       ; Process performance data
            retain_status_information       1       ; Retain status information across program restarts
            retain_nonstatus_information    1       ; Retain non-status information across program restarts
            register                        0       ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
            is_volatile                     0
            check_period                    24x7
            max_check_attempts              4
            normal_check_interval           5
            retry_check_interval            1
            contact_groups                  admins
            notification_options            w,u,c,r
            notification_interval           960
            notification_period             24x7
            }
    
    # Define a service to "ping" the local machine
    
    define service{
            use                             generic-service         ; Name of service template to use
            host_name                       srv1
            service_description             PING
    	check_command			check_ping!100.0,20%!500.0,60%
            }
    
    define service{
            use                             generic-service
            host_name                       firewall
            service_description             PING
            check_command                   check_ping!100.0,20%!500.0,60%
            }
    
    define service{
            use                             generic-service
            host_name                       winxp1
            service_description             PING
            check_command                   check_ping!100.0,20%!500.0,60%
            }
    
    define service{
            use                             generic-service
            host_name                       isp1
            service_description             PING
            check_command                   check_ping!100.0,20%!500.0,60%
            }
    
    define service{
            use                             generic-service
            host_name                       isp2
            service_description             PING
            check_command                   check_ping!100.0,20%!500.0,60%
            }
    
    # Define a service to check the disk space of the root partition
    # on the local machine.  Warning if < 20% free, critical if
    # < 10% free space on partition.
    
    define service{
            use                             generic-service         ; Name of service template to use
            host_name                       srv1
            service_description             Root Partition
    	check_command			check_local_disk!20%!10%!/
            }
    
    # Define a service to check the number of currently logged in
    # users on the local machine.  Warning if > 20 users, critical
    # if > 50 users.
    
    define service{
            use                             generic-service         ; Name of service template to use
            host_name                       srv1
            service_description             Current Users
    	check_command			check_local_users!20!50
            }
    
    # Define a service to check the load on the local machine. 
    
    define service{
            use                             generic-service         ; Name of service template to use
            host_name                       srv1
            service_description             Current Load
    	check_command			check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
            }
    
    # Other services
    
    define service{
            use                             generic-service
            host_name                       srv1
            service_description             DNS
            check_command                   check_dns
            }
    
    define service{
            use                             generic-service
            host_name                       srv1
            service_description             HTTP
            check_command                   check_http
            }
    
    define service{
            use                             generic-service
            host_name                       srv1
            service_description             FTP
            check_command                   check_ftp
            }
    
    define service{
            use                             generic-service
            host_name                       srv1
            service_description             Proxy
            check_command                   check_proxy
            }
    
    # EOF
    
  • Plug-in

    Note that the following plugins would be available in your system. Plugins are under /usr/lib/nagios/plugins/
    check_breezecheck_httpcheck_mysqlcheck_procscheck_time
    check_by_sshcheck_icmpcheck_nagioscheck_radiuscheck_udp
    check_dhcpcheck_ifoperstatuscheck_nntpcheck_realcheck_udp2
    check_digcheck_ifstatuscheck_nntpscheck_rpccheck_ups
    check_diskcheck_imapcheck_nrpecheck_sensorscheck_users
    check_disk_smbcheck_ircdcheck_ntcheck_simapcheck_wave
    check_dnscheck_jabbercheck_ntpcheck_smtpnegate
    check_dummycheck_ldapcheck_nwstatcheck_snmpurlize
    check_file_agecheck_loadcheck_oraclecheck_spoputils.pm
    check_flexlmcheck_logcheck_overcrcheck_sshutils.sh
    check_fpingcheck_mailqcheck_pgsqlcheck_ssmtp
    check_ftpcheck_mrtgcheck_pingcheck_swap
    check_hpjdcheck_mrtgtrafcheck_popcheck_tcp

  • Execute with -h option to check the help
    # cd /usr/lib/nagios/plugins/
    # ./check_imap -h
    check_IMAP (nagios-plugins 1.4.2) 1.66
    Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>
    Copyright (c) 1999-2004 Nagios Plugin Development Team
            <nagiosplug-devel@lists.sourceforge.net>
    
    This plugin tests IMAP connections with the specified host.
    
    Usage: check_IMAP -H host -p port [-w <warning time>] [-c <critical time>]
                      [-s <send string>] [-e <expect string>] [-q <quit string>]
                      [-m <maximum bytes>] [-d <delay>] [-t <timeout seconds>]
                      [-r <refuse state>] [-M <mismatch state>] [-v] [-4|-6] [-j]
                      [-D <days to cert expiry>] [-S <use SSL>]
    ...
    
    To check the ssh, test as,
    # ./check_ssh 192.168.5.1
    SSH OK - OpenSSH_4.2-chrootssh (protocol 1.99)
    
    Edit minimal.cfg or checkcommands.cfg, and define the template for the plugin, if it is not configured. For example,
    define command{
            command_name    check_imap
            command_line    $USER1$/check_imap -H $HOSTADDRESS$
            }
    
    define command{
            command_name    check_ssh
            command_line    $USER1$/check_ssh $HOSTADDRESS$
            }
    
    define command{
            command_name    check_nt_cpu
            command_line    $USER1$/check_nt -H $HOSTADDRESS$ -v CPULOAD -l 60,90,95,120,90,95
            }
    
    define command{
            command_name    check_nt_mem
            command_line    $USER1$/check_nt -H $HOSTADDRESS$ -v MEMUSE
            }
    ...
    

    Note: For check_nt plugin, you need to install NSClient to Windows PC. Download from http://nsclient.ready2run.nl/

    Open /etc/nagios/services.cfg, and add followings,
    define service{
            use                             generic-service
            host_name                       srv1
            service_description             IMAP
            check_command                   check_imap
            }
    
    define service{
            use                             generic-service
            host_name                       srv1
            service_description             SSH
            check_command                   check_ssh
            }
    
    define service{
            use                             generic-service
            host_name                       winxp1
            service_description             CPU
            check_command                   check_nt_cpu
            }
    
    define service{
            use                             generic-service
            host_name                       winxp1
            service_description             MEM
            check_command                   check_nt_mem
            }
    

    Run nagios

    Verify the configuration
    # nagios -v nagios.cfg
    
    If there is not error, you will see as,
    Total Warnings: 0
    Total Errors:   0
    
    Things look okay - No serious problems were detected during the pre-flight check
    
  • Run nagios, nrpe, and nagios-nrpe
    # chkconfig nagios on
    # chkconfig nrpe on
    # chkconfig nagios-nrpe on
    # /etc/init.d/nagios restart
    # /etc/init.d/nrpe restart
    # /etc/init.d/xinetd restart
    

    Report

  • Open http://yourhost/nagios/
  • Service Detail
  • Host Detail
  • Status Overview
  • Status Summary
  • Status Grid
  • Status Map
  • 3D Status Map
  • You might need to install VRML plug-in for your browser. i.e.,
    1. Windows: Cortona VRML Client (http://www.parallelgraphics.com/products/cortona/)
    2. Linux: FreeWRL (http://freewrl.sourceforge.net/)

  • Back
    Google
    Web www.grape-info.com