#!/usr/bin/env python # $Id: checkmdstate.py 7 2010-03-14 17:57:08Z coolcold $ # This script should check /proc/mdstat and complain in nagios manner about degraded arrays # raid0, which can't be degraded is not supported yet # Script can be ran locally or via snmp, for example nagios_statuses={"OK":0,"WARNING":1,"CRITICAL":2,"UNKNOWN":3} import sys #from optparse import OptionParser mdstatfile="/proc/mdstat" #mdstatfile="/banners/tmp/test.txt" blockstring=" blocks " txtmsg="OK" mdisbad=False shouldquit=False mdinfo={} extcode=0 f = None try: f = open(mdstatfile, 'r') read_data=f.read() except IOError, e: keke=getattr(e,"args","Unknown error") #print str(keke) #print e[0], e[1] if e[0]==2: #no mdstat file, no raid on machine? raids not degraded so it's ok ( should be configurable ) txtmsg="file %s not found, assuming no RAIDs on the box and it's ok" % mdstatfile shouldquit=True else: txtmsg="error occured while reading %s : %s" % (mdstatfile,str(keke)) extcode=nagios_statuses["CRITICAL"] except Exception, ee: kuku=getattr(ee,"args","Unknown error") txtmsg="UNKNOWN error occured while reading %s : %s" % (mdstatfile,str(kuku)) extcode=nagios_statuses["UNKNOWN"] if extcode!=0 or shouldquit==True: print txtmsg sys.exit(extcode) try: f.close() except Exception, ee: kuku=getattr(ee,"args","Unknown error") txtmsg="UNKNOWN error occured while closing %s : %s" % (mdstatfile,str(kuku)) extcode=nagios_statuses["UNKNOWN"] if extcode!=nagios_statuses["OK"]: #smth bad happened, we can finish here print txtmsg sys.exit(extcode) #checking content of and getting raids statuses data=read_data.split("\n") c=0 for i in data: #print "i is %s" % i if i.find(blockstring)!=-1: #let's check for [U...U] at the end of string #getting md's name mdname=data[c-1].split(" ")[0] #print "blocks found!: %s" % i aa=i.rfind("[") if aa != -1: uplist=i[aa+1:-1] if uplist == "U" * len(uplist): mdinfo[mdname]="%s ok [%s]" % (mdname,uplist) else: mdinfo[mdname]="%s NOT OK [%s]" % (mdname,uplist) mdisbad=True c=c+1 mdoutput="" for mds in mdinfo.keys(): mdoutput=mdoutput+mdinfo[mds]+", " if mdisbad: extcode=nagios_statuses["CRITICAL"] txtmsg="CRITICAL:" txtmsg=txtmsg + " " + mdoutput print txtmsg sys.exit(extcode)