#!/usr/bin/env python # Author: Troels Arvin # # $Revision: 12707 $ # $Date: 2010-04-21 00:03:21 +0200 (Wed, 21 Apr 2010) $ # # Copyright (c) 2009, Danish National Board of Health. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the the Danish National Board of Health nor the # names of its contributors may be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY the Danish National Board of Health ''AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL the Danish National Board of Health BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ This nagios plugin requires that the pymssql Python module be installed. The plugin was tested with version 1.0.1 of the pymssql Python module on a Red Hat 5 installation, communicating with version 2005 of MSSQL. Before using the plugin, set up a table like this in the database to be monitored (the name of the table is up to you, but by default, the plugin expects a table called 'write_test', in a schema called 'nagios'): CREATE TABLE nagios.write_test ( last_write DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ) Set permissions such that the Nagios plugin may log in with a relevant user ID and perform DELETE+INSERT on the table. The following Nagios example configuration assumes: - the database server's name is 'somehost' - the database being monitored is called 'nagios' - a plain/non-AD user called 'nagios' exists in the database - the user's password is sEcReT - the plugin script is placed in a directory called /usr/local/nagios First, define a command: define command{ command_name check_mssql_write command_line /usr/local/nagios/check_mssql_write.py -H $HOSTADDRESS$ -a sEcReT } The command may now be used in a service definition: define service{ service_description Writes in database host_name somehost check_command check_mssql_write use generic-service } The plugin should be supplemented by a check of basic TCP port 1433 connectivity (check_tcp) to the database, possibly in a service dependency relationship, in order to be able to differentiate between real database issues, and cases like firewall mis-configuration, etc. """ import sys import os import getopt import time # ===================================== # Defaults-handling # ===================================== default_port=1433 default_database='nagios' default_username='nagios' default_tablename='nagios.write_test' default_warning_sec=5 default_critical_sec=60 hostname=None port=None database=None tablename=None username=None password=None warning_sec=None critical_sec=None this_script = os.path.basename(__file__) # ===================================== # Parameter handling # ===================================== def usage(exit_error=True): print """Usage: %s [-h|--help] %s -H -a [-u ] [-p ] [-d ] [-t ] [-w sec] [-c sec] Default values: username : %s port : %d database : %s tablename : %s warning_sec : %d critical_sec : %d""" % (this_script,this_script,default_username,default_port,default_database,default_tablename,default_warning_sec,default_critical_sec) if exit_error: sys.exit(3) # (Most of the script is wrapped in a big try-block, so that # errors in the script are treated in a way which is interpreted # as an UNKNOWN state my Nagios.) try: import pymssql try: options, args = getopt.getopt \ ( sys.argv[1:], "H:u:a:p:d:t:w:c:h", [ "help" ] ) except getopt.GetoptError: usage() for name, value in options: if name in ("-h", "--help"): usage() elif name=='-H': hostname=value elif name=='-u': username=value elif name=='-a': password=value elif name=='-p': port=int(value) elif name=='-d': database=value elif name=='-t': tablename=value elif name=='-w': warning_sec=int(value) elif name=='-c': critical_sec=int(value) else: print "Unknown parameter '%s'." % name print usage() if port is None: port=default_port if username is None: username=default_username if database is None: database=default_database if tablename is None: tablename=default_tablename if warning_sec is None: warning_sec=default_warning_sec if critical_sec is None: critical_sec = default_critical_sec if hostname is None or username is None or password is None: print "At least one required parameter missing." print usage() if (warning_sec > critical_sec): print "warning seconds (%d) may not be greater than critical seconds (%d)." % (warning_sec,critical_sec) usage() # ===================================== # Real work: Connect, and delete+insert # ===================================== t1 = time.time() try: conn = pymssql.connect(host='%s:%d'%(hostname,port), database=database, user=username, password=password, timeout=critical_sec, login_timeout=critical_sec) except pymssql.DatabaseError, e: # We need to be able to differentiate between timeouts and # other errors, such as password errors if 'error message 20003' in str(e) or 'timed out' in str(e): print "MSSQL CRITICAL: connection timeout" sys.exit(2) else: print "MSSQL UNKNOWN: Database connection error: %s" % ' '.join(str(e).split()) sys.exit(3) t2 = time.time() elapsed = t2-t1 curs = conn.cursor() sql_delete = 'DELETE FROM %s' % tablename sql_insert = 'INSERT INTO %s VALUES(DEFAULT)' % tablename curs.execute(sql_delete) curs.execute(sql_insert) conn.commit() conn.close() # ===================================== # How did it go, time-wise? # ===================================== if elapsed > critical_sec: print "MSSQL CRITICAL: Connect+write time (%f) was longer than %d seconds" % (elapsed,critical_sec) sys.exit(2) if elapsed > warning_sec: print "MSSQL WARNING: Connect+write time (%f) was longer than %d seconds" % (elapsed,warning_sec) sys.exit(1) except pymssql.DatabaseError, e: print "MSSQL CRITICAL: Database exception: %s" % ' '.join(str(e).split()) sys.exit(2) except SystemExit, e: # Special case which is needed in order to convert the return code # from other exception handlers. sys.exit(int(str(e))) except: # At this point, we don't know what's going on, so let's # not output the details of the error into something which # would appear in the Nagios web interface. print "MSSQL UNKNOWN: An unhandled error occurred. " sys.stderr.write('Unhandled error: %s' % sys.exc_info()[1]) sys.exit(3) # ===================================== # All well; construct return message # and performance numbers # ===================================== print "MSSQL OK: Connect+write time: %f sec|time=%fs;%d;%d;0" % (elapsed,elapsed,warning_sec,critical_sec) sys.exit(0)