RabbitMQ-基于Python的监控脚本


监控脚本:

[root@node10 chapter-10]# python nagios_check.py critical

Status is CRITICAL


[root@node10 chapter-10]# cat nagios_check.py
import sys,json,httplib,base64
status = sys.argv[1]
if status.lower() == "warning":
        print "Status is WARN"
        exit(1)
elif status.lower() =="critical":
        print "Status is CRITICAL"
        exit(2)
elif status.lower() == "unknow":
        print "Status is UNKNOWN"
        exit(3)
else:
        print "Status is OK"
        exit(0)

使用AMQP模拟检测来确认MQ是否运行

测试:
[root@node10 chapter-10]# python amqp_ping_check.py localhost:5672 / admin admin
OK: Connect to localhost:5672 successfull.


[root@node10 chapter-10]# python amqp_ping_check.py localhost:5672 / admin admin
CRITICAL:Could not connect to localhost:5672!




代码:
[root@node10 chapter-10]# cat amqp_ping_check.py
import sys,pika
EXIT_OK=0
EXIT_WARNING=1
EXIT_CRITICAL=2
EXIT_UNKNOWN=3
server,port = sys.argv[1].split(":")
vhost = sys.argv[2]
username=sys.argv[3]
password = sys.argv[4]


creds_broker = pika.PlainCredentials(username,password)
conn_params = pika.ConnectionParameters(server,virtual_host=vhost,credentials=creds_broker)


try:
        conn_broker = pika.BlockingConnection(conn_params)
        channel = conn_broker.channel()
except Exception:
        print "CRITICAL:Could not connect to %s:%s!" %(server,port)
        exit(EXIT_CRITICAL)
print "OK: Connect to %s:%s successfull." %(server,port)
exit(EXIT_OK)


通过REST API来检测MQ

测试:
[root@node10 chapter-10]# python api_ping_check.py localhost:15672 / admin admin
OK: Broker alive: {"status":"ok"}


[root@node10 chapter-10]# cat api_ping_check.py
import sys,json,httplib,urllib,base64,socket


EXIT_OK=0
EXIT_WARNING=1
EXIT_CRITICAL=2
EXIT_UNKNOWN=3
server,port = sys.argv[1].split(":")
vhost = sys.argv[2]
username=sys.argv[3]
password = sys.argv[4]




conn = httplib.HTTPConnection(server,port)
path="/api/aliveness-test/%s" %urllib.quote(vhost,safe="")
method= "GET"
credentials = base64.b64encode("%s:%s" %(username,password))


try:
        conn.request(method,path,"",{"Content-Type":"application/json","Authorization":"Basic " + credentials})
except socket.error:
        print "CRITICAL: Could not connect to %s:%s" %(server,port)
        exit(EXIT_CRITICAL)
response = conn.getresponse()
if response.status >299:
        print "CRITICAL: Broker not alive: %s" %response.read()
        exit(EXIT_CRITICAL)
print "OK: Broker alive: %s" %response.read()
exit(EXIT_OK)






server,port = sys.argv[1].split(":")
vhost = sys.argv[2]
username=sys.argv[3]
password = sys.argv[4]


creds_broker = pika.PlainCredentials(username,password)
conn_params = pika.ConnectionParameters(server,virtual_host=vhost,credentials=creds_broker)


try:
        conn_broker = pika.BlockingConnection(conn_params)
        channel = conn_broker.channel()
except Exception:
        print "CRITICAL:Could not connect to %s:%s!" %(server,port)
        exit(EXIT_CRITICAL)
print "OK: Connect to %s:%s successfull." %(server,port)
exit(EXIT_OK)

监控配置文件修改监控

演示:
[root@node10 chapter-10]# python api_config_file_modify_check.py localhost:15672 / admin admin backup_orders true trueWARN:Queue 'backup_orders' - auto_delete flag is NOT True.
[root@node10 chapter-10]# python api_config_file_modify_check.py localhost:15672 / admin admin backup_orders false  true
OK: Queue backup_orders configured correctly.
[root@node10 chapter-10]# python api_config_file_modify_check.py localhost:15672 / admin admin backup_orders false  false
WARN:Queue 'backup_orders' - durable flag is NOT False.




代码:
[root@node10 chapter-10]# cat api_config_file_modify_check.py
import sys,json,httplib,urllib,base64,socket


EXIT_OK=0
EXIT_WARNING=1
EXIT_CRITICAL=2
EXIT_UNKNOWN=3


server,port = sys.argv[1].split(":")
vhost = sys.argv[2]
username=sys.argv[3]
password = sys.argv[4]
queue_name = sys.argv[5]
auto_delete = json.loads(sys.argv[6].lower())
durable = json.loads(sys.argv[7].lower())






conn = httplib.HTTPConnection(server,port)
path="/api/queues/%s/%s" %(urllib.quote(vhost,safe=""),urllib.quote(queue_name))
method= "GET"
credentials = base64.b64encode("%s:%s" %(username,password))


try:
        conn.request(method,path,"",{"Content-Type":"application/json","Authorization":"Basic " + credentials})
except socket.error:
        print "UNKNOWN: Could not connect to %s:%s" %(server,port)
        exit(EXIT_UNKNOWN)
response = conn.getresponse()
if response.status == 404:
        print "CRITICAL: Queue %s does not exits." % queue_name
        exit(EXIT_CRITICAL)
if response.status >299:
        print "UNKNOWN: Unexpected API error: %s" %response.read()
        exit(EXIT_UNKNOWN)
response = json.loads(response.read())
if response["auto_delete"] !=auto_delete:
        print "WARN:Queue '%s' - auto_delete flag is NOT %s." %(queue_name,auto_delete)
        exit(EXIT_WARNING)


if response["durable"] !=durable:
        print "WARN:Queue '%s' - durable flag is NOT %s." %(queue_name,durable)
        exit(EXIT_WARNING)


print "OK: Queue %s configured correctly." %queue_name
exit(EXIT_OK)



监控集群状态

演示:
手动查看;
[root@node10 chapter-10]# curl -i -u admin:admin http://localhost:15672/api/nodes


通过api接口:
[root@node10 chapter-10]# python cluster_health_check.py localhost:15672 admin admin rabbit@node6,rabbit@node10 340000 3300000
CRITICAL:Node rabbit@node10 memory usage is 31515144.
[root@node10 chapter-10]# python cluster_health_check.py localhost:15672 admin admin rabbit@node6,rabbit@node10 34000000 335845520
OK:2 node. All memory usage below 335845520.
[root@node10 chapter-10]# python cluster_health_check.py localhost:15672 admin admin rabbit@node6,rabbit@node10 34000000 335845520
UNKNOWN: Could not connect to localhost:15672
[root@node10 chapter-10]# python cluster_health_check.py localhost:15672 admin admin rabbit@node6,rabbit@node10 34000000 335845520
WARNING:Cluster missing nodes: ['rabbit@node6']




代码:
[root@node10 chapter-10]# cat cluster_health_check.py
import sys,json,httplib,urllib,base64,socket
EXIT_OK=0
EXIT_WARNING=1
EXIT_CRITICAL=2
EXIT_UNKNOWN=3


server,port = sys.argv[1].split(":")
username=sys.argv[2]
password = sys.argv[3]
node_list = sys.argv[4].split(",")
mem_critical = int(sys.argv[5])
mem_warning = int(sys.argv[6])


conn = httplib.HTTPConnection(server,port)
path="/api/nodes"
method= "GET"
credentials = base64.b64encode("%s:%s" %(username,password))
try:
        conn.request(method,path,"",{"Content-Type":"application/json","Authorization":"Basic " + credentials})
except socket.error:
        print "UNKNOWN: Could not connect to %s:%s" %(server,port)
        exit(EXIT_UNKNOWN)
response = conn.getresponse()
if response.status >299:
        print "UNKNOWN: Unexpected API error: %s" %response.read()
        exit(EXIT_UNKNOWN)
response = json.loads(response.read())


for node in response:
        if node["name"] in node_list and node["running"] !=False:
                node_list.remove(node["name"])
if len(node_list):
        print "WARNING:Cluster missing nodes: %s" %str(node_list)
        exit(EXIT_WARNING)
for node in response:
        if node["mem_used"] >mem_critical:
                print "CRITICAL:Node %s memory usage is %d." %(node["name"],node["mem_used"])
                exit(EXIT_CRITICAL)
        elif node["mem_used"] >mem_warning:
                print "WARNING: Node %s memory usage is %d."%(node["name"],node["mem_used"])
                exit(EXIT_WARNING)
print "OK:%d node. All memory usage below %d." %(len(response),mem_warning)
exit(EXIT_OK)


分割线
感谢打赏
江西数库信息技术有限公司
YWSOS.COM 平台代运维解决方案
 评论
 发表评论
姓   名:

Powered by AKCMS