背景:新入职一家公司,现场没有对sr的进行监控,根据开发的需求编写了一个python脚本。
脚本逻辑:抓取sr的be/fe/routine load状态信息,判读是否触发告警,若满足告警条件,则发送告警信息到钉钉群,并艾特对应的责任人。
# -*- coding: utf-8 -*-
""""
author: zjh
date: 2023-09-28
description:StarRocks cluster monitoring, include routine load,frontend,backend status.And then send warn message to dingtalk.
"""
import base64
import urllib
import requests
import json
import time
import hmac
import hashlib
import pymysqldef msg(warntype, content, token):# 根据实际修改1:钉钉机器人地址dingding_url = 'https://oapi.dingtalk.com/robot/send?access_token=' + token# 根据实际修改2:钉钉的加签secret = 'SECd0c0f14733789a******************645a4d1ae76ea3481b1384f5ef'# 加签算法,钉钉文档里的timestamp = str(round(time.time() * 1000))secret_enc = secret.encode('utf-8')string_to_sign = '{}\n{}'.format(timestamp, secret)string_to_sign_enc = string_to_sign.encode('utf-8')hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))headers = {'Content-Type': 'application/json;charset=utf-8'}api_url = dingding_url + "×tamp={}&sign={}".format(timestamp, sign) ##加签算法使用if warntype == 'routineloadwarn':owner=["dfdfs134"]if warntype == 'fewarn' or warntype == 'bewarn':owner=["faasdfaAQE"]json_text = {"at": {"atMobiles": ["180xxxxxx"],"atDingtalkIds":owner,"isAtAll": "False"},"msgtype": "text","text": {"content": content}}requests.post(api_url, json.dumps(json_text), headers=headers)def srmsg(ip,port,username,passwd,db,exe_sql):conn = pymysql.connect(host=ip,port=port,user=username,password=passwd,database=db)cursor = conn.cursor()sql = exe_sqlcursor.execute(sql)result=cursor.fetchall()cursor.close()conn.close()return resultdef fewarn(clustername,base_tuple_fe,token):warntype='fewarn'#2.4.1版本SRif clustername == 'SR':for feinfo in base_tuple_fe:# print(feinfo)if feinfo[10] == "false":fewanrmsg = "集群: " + clustername + "\n" + \"异常FE: " + feinfo[1] + "\n" + \"状态: " + feinfo[10] + "\n" + \"上次启动时间: " + feinfo[15]msg(warntype, fewanrmsg, token)else:#2.4.1版本SRfor feinfo in base_tuple_fe:# print(feinfo)if feinfo[9] == "false":fewanrmsg = "集群: " + clustername + "\n" + \"异常FE: " + feinfo[1] + "\n" + \"状态: " + feinfo[9] + "\n" + \"上次启动时间: " + feinfo[14]msg(warntype, fewanrmsg, token)def bewarn(clustername,base_tuple_be,token):warntype='bewarn'#2.4.1版本SRif clustername == 'SatrRocks':for beinfo in base_tuple_be:# print(beinfo)if beinfo[9] == "false":bewanrmsg = "集群: " + clustername + "\n" + \"异常BE: " + beinfo[2] + "\n" + \"状态: " + beinfo[9] + "\n" + \"上次启动时间: " + beinfo[7]msg(warntype, bewanrmsg, token)else:#2.3.1版本srfor beinfo in base_tuple_be:# print(beinfo)if beinfo[8] == "false":bewanrmsg = "集群: " + clustername + "\n" + \"异常BE: " + beinfo[1] + "\n" + \"状态: " + beinfo[8] + "\n" + \"上次启动时间: " + beinfo[6]msg(warntype, bewanrmsg, token)def routineloadwarn(clustername,load_tuple,token):warntype='routineloadwarn'for info in load_tuple:#title = dict_srprod["envname"]input_message= "集群: "+ clustername + "\n" +\"任务id: "+ info[0] + "\n" +\"任务名: "+ info[1] + "\n" +\"库名: " + info[5]+ "\n" +\"表名: " + info[6]+ "\n" +\"状态: " + info[7]+ "\n" +\"挂起时间: "+ info[2] + "\n" +\"日志链接:" + info[16] + "\n" +\"其他错误:" + info[17]msg(warntype, input_message, token)if __name__ == '__main__':dict_srprod={"envname":"生产SatrRocks","ip":"192.168.10.10","port":29030,"username":"root","passwd":"********","dbname":["test1","test2"],"check_fe_status":"show frontends;","check_be_status":"show backends;","check_rtload":"show routine load where State='PAUSED' and TableName in ('t1','t2');"}dict_qwprod={"envname":"生产(1)SatrRocks","ip":"192.168.10.10","port":39030,"username":"root","passwd":"********","dbname":["test0","test"],"check_fe_status":"show frontends;","check_be_status":"show backends;","check_rtload":"show routine load where State='PAUSED';"}dict_srsit={"envname":"测试SatrRocks","ip":"192.168.10.11","port":19030,"username":"root","passwd":"********","dbname":["test1","test2","test3"],"check_fe_status":"show frontends;","check_be_status":"show backends;","check_rtload":"show routine load where State='PAUSED' and TableName='t0';"}token = "xxxxx""""环境0"""#生产 routine load monitorbase_tuple=srmsg(dict_srprod["ip"],dict_srprod["port"],dict_srprod["username"],dict_srprod["passwd"],dict_srprod["dbname"][0],dict_srprod["check_rtload"])routineloadwarn(dict_srprod["envname"],base_tuple,token)# 生产be/fe状态监控base_tuple_fe = srmsg(dict_srprod["ip"], dict_srprod["port"],dict_srprod["username"], dict_srprod["passwd"],dict_srprod["dbname"][0], dict_srprod["check_fe_status"])fewarn(dict_srprod["envname"],base_tuple_fe,token)base_tuple_be = srmsg(dict_srprod["ip"], dict_srprod["port"],dict_srprod["username"], dict_srprod["passwd"],dict_srprod["dbname"][0], dict_srprod["check_be_status"])bewarn(dict_srprod["envname"],base_tuple_be,token)"""环境1"""#### routine load monitorbase_tuple=srmsg(dict_qwprod["ip"],dict_qwprod["port"],dict_qwprod["username"],dict_qwprod["passwd"],dict_qwprod["dbname"][0],dict_qwprod["check_rtload"])routineloadwarn(dict_qwprod["envname"],base_tuple,token)# be/fe状态监控base_tuple_fe = srmsg(dict_qwprod["ip"], dict_qwprod["port"],dict_qwprod["username"], dict_qwprod["passwd"],dict_qwprod["dbname"][0], dict_qwprod["check_fe_status"])fewarn(dict_qwprod["envname"],base_tuple_fe,token)base_tuple_be = srmsg(dict_qwprod["ip"], dict_qwprod["port"],dict_qwprod["username"], dict_qwprod["passwd"],dict_qwprod["dbname"][0], dict_qwprod["check_be_status"])bewarn(dict_qwprod["envname"],base_tuple_be,token)"""环境2"""####测试routine load monitorbase_tuple=srmsg(dict_srsit["ip"],dict_srsit["port"],dict_srsit["username"],dict_srsit["passwd"],dict_srsit["dbname"][0],dict_srsit["check_rtload"])routineloadwarn(dict_srsit["envname"],base_tuple,token)# 测试be/fe状态监控base_tuple_fe = srmsg(dict_srsit["ip"], dict_srsit["port"],dict_srsit["username"], dict_srsit["passwd"],dict_srsit["dbname"][0], dict_srsit["check_fe_status"])fewarn(dict_srsit["envname"],base_tuple_fe,token)base_tuple_be = srmsg(dict_srsit["ip"], dict_srsit["port"],dict_srsit["username"], dict_srsit["passwd"],dict_srsit["dbname"][0], dict_srsit["check_be_status"])bewarn(dict_srsit["envname"],base_tuple_be,token)
另外,钉钉的一些开发说明,请参阅钉钉官网
注册机器人链接:https://open.dingtalk.com/document/connector/alarm-subscription