##================================================================##

先来个笑话:

刚交的女朋友让我去他家玩,我觉得空手不好,于是告诉她我买点东西就过去,谁知道她突然来一句:不用买,家里还有一盒!

##================================================================##

昨天在折腾一台磁盘出现性能问题的MySQL时,根据监控去慢日志里找,发现短短10分钟有好几万行慢日志,由于对慢日志结构不熟悉,打开一个600+MB的文件拖着滚动条看,好多慢SQL一条SQL就有几百行,看来看去都没找到问题最根本原因,总算功夫不负有心人,总算在不经意间找到,要不然让我在无知中慢慢老去???

问题SQL如下:

造成的影响:

怀疑造成问题的原因:

由于子查询中没有任何过滤条件,进行全表扫描,而服务器内存不够,导致扫描出来的数据有写到临时文件中,同时服务器上还有其他业务运行,服务器无法及时处理完所有请求,造成积压,而积压请求又增加服务器压力,最终导致相互影响恶性循环。

也不知道研发为啥蛋疼地这么套一下,反正Mysql语句优化处理能力这么差,大家尽可能简化SQL吧

##===================================================##

言归正传,由于这一个SQL执行频率较低,使用pt工具分析不能准确定位到该SQL,而使用肉眼看又比较蛋疼,因此写个小工具来处理,同时还可以解决慢日志较大阅读不便的问题。

python脚本如下:

# coding: utf-8
import datetime
from optparse import OptionParser
MAX_QUERY_TIME_SECONDS = 10
START_DATETIME = datetime.datetime.min
END_DATETIME = datetime.datetime.max
SLOW_LOG_PATH = ""
class EventInfo(object):
    def __init__(self):
        self.exec_time = None
        self.login_user = ""
        self.query_time_seconds = 0
        self.lock_time_seconds = 0
        self.rows_sent = 0
        self.rows_examined = 0
        self.command_text = ""
        self.file_line_num = 0
def get_datetime_by_time_line(time_line):
    if time_line.startswith("# Time:"):
        time_str = "20" + time_line.replace("# Time:", "").replace("  ", " ").strip()
        return datetime.datetime.strptime(time_str, "%Y%m%d %H:%M:%S")
    else:
        return None
def set_query_info(event_info, query_info_line):
    line = query_info_line
    if line.startswith("# Query_time:"):
        tmp_line = line.replace("#", "").replace(": ", ":").replace("  ", " ").replace("  ", " ")
        tmp_items = tmp_line.split(" ")
        for tmp_item in tmp_items:
            # print(tmp_item)
            if tmp_item.strip().startswith("Query_time:"):
                event_info.query_time_seconds = float(tmp_item.replace("Query_time:", ""))
            if tmp_item.strip().startswith("Lock_time:"):
                event_info.lock_time_seconds = float(tmp_item.replace("Lock_time:", ""))
            if tmp_item.strip().startswith("Rows_sent:"):
                event_info.rows_sent = float(tmp_item.replace("Rows_sent:", ""))
            if tmp_item.strip().startswith("Rows_examined:"):
                event_info.rows_examined = float(tmp_item.replace("Rows_examined:", ""))
def set_login_user(event_info, login_user_line):
    line = login_user_line
    if line.startswith("# User@Host:"):
        event_info.login_user = line.replace("# User@Host:", "").strip()
def set_command_text(event_info, command_text_line):
    line = command_text_line
    event_info.command_text += chr(13) + line
def print_event_info(event_info):
    print_info = """
file_line_num:      {7}
Time:               {0}
User@Host:          {1}
Query_time:         {2:.5f} seconds
Lock_time:          {3:.5f} seconds
Rows_sent:          {4}
Rows_examined:      {5}
command_text:
""".format(event_info.exec_time,
           event_info.login_user,
           event_info.query_time_seconds,
           event_info.lock_time_seconds,
           event_info.rows_sent,
           event_info.rows_examined,
           event_info.command_text,
           event_info.file_line_num)
    print("*" * 70)
    print(print_info)
def get_user_paras():
    try:
        opt = OptionParser()
        opt.add_option('--slow_log',
                       dest='slow_log',
                       type=str,
                       help='the file path of the slow log ')
        opt.add_option('--long_query_seconds',
                       dest='long_query_seconds',
                       type=float,
                       help='long query seconds')
        opt.add_option('--start_datetime',
                       dest="start_datetime",
                       type=str,
                       help="start datetime, format is %Y-%m-%d %H:%M:%S")
        opt.add_option('--end_datetime',
                       dest="end_datetime",
                       type=str,
                       help="end datetime,  format is %Y-%m-%d %H:%M:%S")
        (options, args) = opt.parse_args()
        error_messages = []
        slow_log = options.slow_log
        start_datetime = options.start_datetime
        long_query_seconds = options.long_query_seconds
        end_datetime = options.end_datetime
        if not slow_log:
            error_messages.append("slow_log must be set;")
        else:
            global SLOW_LOG_PATH
            SLOW_LOG_PATH = slow_log
        if not long_query_seconds:
            error_messages.append("long_query_seconds must be set;")
        else:
            global MAX_QUERY_TIME_SECONDS
            MAX_QUERY_TIME_SECONDS = long_query_seconds
        try:
            if start_datetime is not None:
                tmp_start_datetime = datetime.datetime.strptime(start_datetime, "%Y-%m-%d %H:%M:%S")
                global START_DATETIME
                START_DATETIME = tmp_start_datetime
        except:
            error_messages.append("the format of start_datetime was wrong ;")
        try:
            if end_datetime is not None:
                tmp_end_datetime = datetime.datetime.strptime(end_datetime, "%Y-%m-%d %H:%M:%S")
                global END_DATETIME
                END_DATETIME = tmp_end_datetime
        except:
            error_messages.append("the format of end_datetime was wrong ;")
        if len(error_messages) > 0:
            opt.print_help()
            for error_message in error_messages:
                print(error_message)
            return False
        else:
            return True
    except Exception as ex:
        print("exception :{0}".format(str(ex)))
        return False
def read_log():
    global MAX_QUERY_TIME_SECONDS
    file_path = SLOW_LOG_PATH
    file_handler = open(file_path)
    line = file_handler.readline()
    line_num = 1
    find_event_count = 0
    current_event_info = EventInfo()
    current_time = ""
    has_time = False
    while line:
        if line.startswith("# Time:"):
            current_time = get_datetime_by_time_line(line)
            if current_time < START_DATETIME:
                line = file_handler.readline()
                line_num += 1
                continue
            if current_time > END_DATETIME:
                break
            line = file_handler.readline()
            line_num += 1
            has_time = True
            continue
        if has_time:
            if line.startswith("# User@Host:"):
                if current_event_info.query_time_seconds > MAX_QUERY_TIME_SECONDS:
                    print_event_info(event_info=current_event_info)
                    find_event_count += 1
                current_event_info = EventInfo()
                current_event_info.file_line_num = line_num
                current_event_info.exec_time = current_time
                set_login_user(event_info=current_event_info,
                               login_user_line=line)
            elif line.startswith("# Query_time:"):
                set_query_info(event_info=current_event_info,
                               query_info_line=line)
            else:
                set_command_text(event_info=current_event_info,
                                 command_text_line=line)
        line = file_handler.readline()
        line_num += 1
    print("*" * 70)
    print("parse completely")
    print("find {0} events".format(find_event_count))
def main():
    if get_user_paras():
        read_log()
if __name__ == '__main__':
    main()

脚本调用方式如:

C:\Python27\python.exe D:/GGA/Code/slow_query_find/find_slow_quer.py --slow_log="D:\slow.log.2016010900" --long_query_seconds=4 --start_datetime="2016-01-08 15:46:00" --end_datetime="2016-01-08 15:50:00"

显示结果如:

##==================================================================##