
| # -*- coding: utf-8 -*- # @Author: Mehaei # @Date: 2019-08-27 16:57:58 # @Last Modified by: Mehaei # @Last Modified time: 2019-08-30 17:00:04 import os import sys # 解决不同路径启动脚本, 自定义模块无法导入问题 work_dir, file_name = os.path.split(__file__) os.chdir(work_dir if work_dir else "./") sys.path.append("../") import time # 主要使用的模块 import psutil from submit_data.to_email import ToEmail # 项目名 发送邮件是subject会使用 PROJECT_NAME = "Test" # unit: s 检查时间间隔 每10分钟检查一次 _CHECK_TIME_INTERVAL = 60 * 10 # 内存标准值 _MEMORY_NORMAL = 90 # 负载总值 等于cpu的核数 _LOADAVG_NORMAL = psutil.cpu_count() # cpu标准值 _CPU_NORMAL = 90 # 检查文件路径大小 _DISK_MONITOR_LIST = ["/home"] # 文件夹正常值 _DISK_NORMAL = 90 # 根据爬虫文件名 杀死爬虫进程 _CRAWL_SPIDER_FILE = ["spider.py"] # 根据启动爬虫命令 杀死爬虫进程 _KILL_PROCESS_COMMAND = ["python3 ./spider.py"] CPU = "cpu" MEMORY = "memory" DISK = "disk" SYS_LOAD = "sys_load" NORMAL = "Normal" # 需要监控的服务 MONITOR_LIST = [CPU, MEMORY, SYS_LOAD] # inform or kill # 如果是inform 则会发送邮件, kill 或杀死爬虫进程 EXCEPTION_HANDLING_METHOD = "inform" # 如果在这个时间段内异常存在则不会在次发送邮件 SEND_EMAIL_INTERVAL = 60 * 60 class ResourceMonitor(object): def __init__(self): self._keep_check() def _keep_check(self) -> None: send_time = 0 while True: check_result = self.check_hardware_status() for hardware, status in check_result.items(): if status == NORMAL: continue else: # send email if EXCEPTION_HANDLING_METHOD == "inform": if (time.time() - send_time) < SEND_EMAIL_INTERVAL: continue ToEmail(SUBJECT="%s ResourceMonitor Exception" % PROJECT_NAME).send(["%s: %s" % (h, s) for h, s in check_result.items()]) send_time = time.time() # kill crawl spider elif EXCEPTION_HANDLING_METHOD == "kill": # self.kill_crawl_process() pass else: pass time.sleep(_CHECK_TIME_INTERVAL) def check_hardware_status(self) -> dict: result = {} error_msg = "%s exception, usage rate: %s" if MEMORY in MONITOR_LIST: memory_usage_rate = self.memory_monitor() if memory_usage_rate > _MEMORY_NORMAL: result[MEMORY] = error_msg % (MEMORY, memory_usage_rate) else: result[MEMORY] = NORMAL if CPU in MONITOR_LIST: cpu_usage_rate = self.cpu_monitor() if cpu_usage_rate > _CPU_NORMAL: result[CPU] = error_msg % (CPU, cpu_usage_rate) else: result[CPU] = NORMAL if SYS_LOAD in MONITOR_LIST: loadavg = self.loadavg_monitor() if loadavg[0] > _LOADAVG_NORMAL: result[SYS_LOAD] = error_msg % (SYS_LOAD, loadavg[0]) else: result[SYS_LOAD] = NORMAL return result # 内存监控 def memory_monitor(self) -> float: """ return memory usage rate type: float """ memory = psutil.virtual_memory() return memory.percent # cpu监控 def cpu_monitor(self): """ return cpu useage rate type: float """ # interval=0.01, percpu=False is solve run script return 0 or 100.0 return psutil.cpu_percent(interval=0.01, percpu=False) # 磁盘监控 def disk_monitor(self) -> dict: """ return _DISK_MONITOR_LIST usage type: dict """ check_result = {} for path in _DISK_MONITOR_LIST: usage_rate = self.disk_status(path) if usage_rate < _DISK_NORMAL: check_result[path] = "Normal" else: check_result[path] = "Error" return check_result def disk_status(self, path: str) -> float: """ params: path, need check file path, example: /amazon type: str return path useage type float """ disk_usage_rate = psutil.disk_usage(path) return disk_usage_rate.percent # 负载监控 def loadavg_monitor(self) -> tuple: """ return system loadavg type: tuple """ return psutil.getloadavg() # 执行杀死爬虫 def kill_crawl_process(self) -> None: """ find all process, and kill crawl process """ pid = psutil.pids() error_msg = "" for k,i in enumerate(pid): try: proc = psutil.Process(i) # print k,i,"%.2f%%"%(proc.memory_percent()),"%",proc.name(),proc.exe() cmdline = proc.cmdline() if " ".join(cmdline) in _KILL_PROCESS_COMMAND: cmdline.terminate() if all((len(cmdline) == 2, cmdline[0] == "python3", cmdline[-1].split("/")[-1] in _CRAWL_SPIDER_FILE)): cmdline.terminate() except Exception as e: error_msg = e finally: # send email pass if __name__ == "__main__": # 启动监控 ResourceMonitor()
|