2017-04-192017-04-192017-04-192017-04-19Zabbix监控docker
公司的测试环境很多机器都用了docker,由于有好几套的测试环境,除了问题进去检查很麻烦 ,所以就在zabbix中做了监控,这样最快的发现问题。节约解决问题的时间。
废话不多少 ,下面操作是看了网上好多的博客做的,大部分东西都是牛人们做过的,我在这里只是借鉴并留下自己的心的。
借鉴博客
http://dl528888.blog.51cto.com/2382721/1660844
1.1 Agent端安装
1.2 安装pip工具
由于zabbix-agent端上要安装一个python的脚本,所以用的到pip
wget "https://pypi.python.org/packages/source/p/pip/pip-1.5.4.tar.gz#md5=834b2904f92d46aaa333267fb1c922bb" --no-check-certificate
tar -xzvf pip-1.5.4.tar.gz
cd pip-1.5.4
python setup.py install
安装docker-py
pip install docker-py
1.3 监控脚本
脚本路径:mkdir –p /usr/local/zabbix/bin/
脚本权限:chmod 777 /usr/local/zabbix/bin/zabbix*
属主属组:chown –R zabbixzabbix /usr/local/zabbix/bin/zabbix*
1.3.1 容器检查脚本
这个脚本用于获取容器名
[root@test4 bin]# cat /usr/local/zabbix/bin/zabbix_low_discovery.sh
#!/bin/bash
#Fucation:zabbix low-level discovery
docker() {
port=($(sudo docker ps -a|grep -v "CONTAINER ID"|awk '{print $NF}'))
printf '{\n'
printf '\t"data":[\n'
for key in ${!port[@]}
do
if [[ "${#port[@]}" -gt 1 && "${key}" -ne "$((${#port[@]}-1))" ]];then
printf '\t {\n'
printf "\t\t\t\"{#CONTAINERNAME}\":\"${port[${key}]}\"},\n"
else [[ "${key}" -eq "((${#port[@]}-1))" ]]
printf '\t {\n'
printf "\t\t\t\"{#CONTAINERNAME}\":\"${port[${key}]}\"}\n"
fi
done
printf '\t ]\n'
printf '}\n'
}
case $1 in
docker)
docker
;;
*)
echo "Usage:`basename $0` {docker}"
;;
Esac
- 检查脚本效果
[root@test4 bin]# sh zabbix_low_discovery.sh docker
{
"data":[
{
"{#CONTAINERNAME}":"docker112"},忍5555
{
"{#CONTAINERNAME}":"docker111"},
{
"{#CONTAINERNAME}":"docker110"},
{
"{#CONTAINERNAME}":"docker109"},
{
"{#CONTAINERNAME}":"docker108"},
{
"{#CONTAINERNAME}":"docker107"},
{
"{#CONTAINERNAME}":"docker106"},
{
"{#CONTAINERNAME}":"docker105"},
{
"{#CONTAINERNAME}":"docker104"},
{
"{#CONTAINERNAME}":"docker103"},
{
"{#CONTAINERNAME}":"docker102"},
{
"{#CONTAINERNAME}":"docker101"}
]
}
1.3.2 获取docker容器中信息的脚本
[root@test4 bin]# cat zabbix_monitor_docker.py
#!/usr/bin/env python
#-*- coding: utf-8 -*-
from docker import Client
import os
import re
import sys
import subprocess
import time
def check_container_stats(container_name,collect_item):
global result
result=0
container_collect=docker_client.stats(container_name)
old_result=eval(container_collect.next())
new_result=eval(container_collect.next())
container_collect.close()
if collect_item == 'cpu_total_usage':
result=new_result['cpu_stats']['cpu_usage']['total_usage'] - old_result['cpu_stats']['cpu_usage']['total_usage']
elif collect_item == 'cpu_system_uasge':
result=new_result['cpu_stats']['system_cpu_usage'] - old_result['cpu_stats']['system_cpu_usage']
elif collect_item == 'cpu_percent':
cpu_total_usage=new_result['cpu_stats']['cpu_usage']['total_usage'] - old_result['cpu_stats']['cpu_usage']['total_usage']
cpu_system_uasge=new_result['cpu_stats']['system_cpu_usage'] - old_result['cpu_stats']['system_cpu_usage']
cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])
result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)
elif collect_item == 'mem_usage':
result=new_result['memory_stats']['usage']
elif collect_item == 'mem_limit':
result=new_result['memory_stats']['limit']
elif collect_item == 'mem_percent':
mem_usage=new_result['memory_stats']['usage']
mem_limit=new_result['memory_stats']['limit']
result=round(float(mem_usage)/float(mem_limit)*100.0,2)
#network_rx_packets=new_result['network']['rx_packets']
#network_tx_packets=new_result['network']['tx_packets']
elif collect_item == 'network_rx_bytes':
network_check_command="""docker exec %s ifconfig eth0|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print $1,$2}'|awk -F ')' '{print $1}'|awk '{print "{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name
network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
time.sleep(1)
network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
#unit KB
result=int(network_new_result['rx']) - int(network_old_result['rx'])
elif collect_item == 'network_tx_bytes':
network_check_command="""docker exec %s ifconfig eth0|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print $1,$2}'|awk -F ')' '{print $1}'|awk '{print "{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name
network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
time.sleep(1)
network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
result=int(network_new_result['tx']) - int(network_old_result['tx'])
return result
if __name__ == "__main__":
null='null'
docker_client = Client(base_url='unix://var/run/docker.sock', version='1.17')
container_name=sys.argv[1]
collect_item=sys.argv[2]
print check_container_stats(container_name,collect_item)
1.4 修改配置文件
定义key
在/etc/zabbix/zabbix_agentd.conf
下添加下面两行
UserParameter=zabbix_low_discovery[*],/bin/bash /usr/local/zabbix/bin/zabbix_low_discovery.sh $1
UserParameter=docker_stats[*],sudo /usr/local/zabbix/bin/zabbix_monitor_docker.py $1 $2
重启agent
/etc/init.d/zabbix-agent restart
1.5 修改权限使zabbix用户能无密码访问docker
echo "zabbix ALL=NOPASSWD: ALL">>/etc/sudoers
55行
Defaults visiblepw
1.6 添加模板
Docker监控到此结束 下面是效果图
有不明白的可以加qq群讨论:573241437