企业中对于服务器常规监控都有部署监控软件系统,如常用的zabbix、ganglia、nagios、observer等,但是对于特殊的业务监控,比如日志中出现某些关键字多少次后即报警通知负责人,对某些Http接口心跳监控或结果正确性检测等,这些特定需求运维也需要开发相应的脚本进行支持。一般监控有变更都需要通知运维人员来操作,我们也可以自己开发脚本实现简单的监控。
import smtplib
import socket
import fcntl
import struct
import os
import commands
import time
from email.mime.text
import MIMEText
# 发送的邮件列表,多个邮件地址逗号分隔
mailto_list=[
'david1228@foxmail.com',
]
# 需要监控列表,JSON格式配置
# 配置说明:logfile需要检测的日志文件,limitnum为阀值,readnum为读取日志最后的行数,kword为出现的关键字,sg为大于或小于阀值满足后报警
check_list={
'mq:geturl_updatevideo:flush cache OK has a problem, please check!' : {
'logfile':
'/home/ldw/logs/geturl/online/geturl_updatevideo.log',
'limitnum':
'10',
'readnum':
'200',
'kword':
'flush cache OK',
'sg':
'<'},
'mq:geturl_updatevideo has message error, please check!' : {
'logfile':
'/home/ldw/logs/geturl/online/geturl_updatevideo.log',
'limitnum':
'1',
'readnum':
'2000',
'kword':
'message error',
'sg':
'<'}
}
# 读取网卡IP,输入参数为网卡名,如eth0、eth1、bond0(网卡做Bond)
def get_ip_address(ifname):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl(
s.fileno(),
0x8915,
# SIOCGIFADDR
struct.pack(
'256s', ifname[:15])
)[20:24])
# 邮件发送
def send_mail(to_list,sub,content):
print content
me=mail_user
# 发送中文,需要设置编码
msg = MIMEText(content,_subtype=
'plain',_charset=
'gb2312')
msg[
'Subject'] = sub
msg[
'From'] = me
msg[
'To'] =
";".join(to_list)
try:
server = smtplib.SMTP()
server.connect(mail_host)
server.login(mail_user,mail_pass)
server.sendmail(me, to_list, msg.as_string())
server.close()
return True
except Exception, e:
print str(e)
return False
# 发送邮箱认证配置
mail_host=
"smtp.126.com"mail_user=
"xyz@126.com"mail_pass=
"xyz"mail_postfix=
"126.com"content =
""timeddiff=300
def monitor_list(ethip):
for k
in check_list.keys():
logfile=check_list[k][
'logfile']
readnum=check_list[k][
'readnum']
limitnum=check_list[k][
'limitnum']
kword = check_list[k][
'kword']
sg = check_list[k][
'sg']
# 需要监控的文件小于5分钟进行检测
if ( os.path.exists(logfile)
and (time.time() - os.stat(logfile).st_mtime) < timeddiff ):
cmdstring=
'tail -n ' + readnum +
' ' + logfile +
' | grep "'+ kword+
'" |wc -l 2>&1' cmdstatus,cmdoutput = commands.getstatusoutput(cmdstring)
if ((sg ==
'<' and int(cmdoutput) < int(limitnum))
or (sg ==
'>' and int(cmdoutput) > int(limitnum))):
# 邮件正文,带上有问题的服务器IP,方便定位.
content = ethip +k+
": "+cmdoutput+
"/"+readnum+
"\n" send_mail(mailto_list,
"Monitor Warning!!!",content)
content =
"" else:
print " Normal monitoring service:"+logfile
else:
print " File has not been updated : "+logfile
if __name__ ==
'__main__':
ethip =
"["+get_ip_address(
'bond0')+
"]\n"+content
monitor_list(ethip)
posted on 2015-10-31 12:37
David1228 阅读(663)
评论(0) 编辑 收藏 所属分类:
动态语言 、
Python