#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author: changjun

from subprocess import *
import os, re, sys
import time
import json, re
import ctypes
import getopt
import traceback

OOM_REASON_CGROUP = 'cgroup memory limit'
OOM_REASON_PCGROUP = 'parent cgroup memory limit'
OOM_REASON_HOST = 'host memory limit'
OOM_REASON_MEMLEAK = 'host memory limit,may caused by memory leak'
OOM_REASON_NODEMASK = 'mempolicy not allowed process to use all the memory of NUMA system'
OOM_REASON_NODE = 'cpuset cgroup not allowed process to use all the memory of NUMA system'
OOM_REASON_MEMFRAG = 'memory fragment'
OOM_REASON_SYSRQ = 'sysrq',
OOM_REASON_OTHER = 'other'


OOM_BEGIN_KEYWORD = "invoked oom-killer"
OOM_END_KEYWORD = "Killed process"
OOM_END_KEYWORD_4_19 = "reaped process"
OOM_END_KEYWORD_5_10 = "oom-kill:constraint"
OOM_CGROUP_KEYWORD = "Task in /"
OOM_NORMAL_MEM_KEYWORD = "Normal: "
OOM_PID_KEYWORD = "[  pid  ]"
pid_pattern = re.compile("\[\d+\]\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+\S+")
WEEK_LIST = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']
CWEEK_LIST = ['一','二','三','四','五','六','日']
MONTH_LIST = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
CMONTH_LIST = ['1月','2月','3月','4月','5月','6月','7月','8月','9月','10月','11月','12月']
mems_pattern = re.compile("cpuset=(.*)[ ,]+mems_allowed=([0-9\-\/\,]*)")
node_pattern = re.compile("nodemask=\(?([0-9\-\/\,null]*)\)?")

work_path = os.getenv("SYSAK_WORK_PATH","")
podinfo_lib = None

if sys.version[0] == '2':
    reload(sys)
    sys.setdefaultencoding('utf8')


def set_podinfo_lib():
    global podinfo_lib
    podinfo_lib = ctypes.CDLL(work_path + "/tools/libpodinfo.so")
    podinfo_lib.CListPods.restype = ctypes.c_char_p
    podinfo_lib.CGetContainerInfos.restype = ctypes.c_char_p
    podinfo_lib.SetLogLevel.argtypes = [ctypes.c_int]
    podinfo_lib.SetLogLevel.restype = None
    podinfo_lib.FreeCString.argtypes = [ctypes.c_char_p]

def get_con_by_cgroup(cg_path):
    result = podinfo_lib.CGetContainerInfos()
    
    if not result:
        return {}

    result_str = result.decode('utf-8')
    cons = json.loads(result_str)
    
    for con in cons:
        if cg_path.startswith(con.get('ContainerCgroup')):
            return con

    return {}

def get_pod_by_cgroup(cg_path):
    result = podinfo_lib.CListPods()
    if not result:
        return {}

    result = result.decode('utf-8')
    pods = json.loads(result)
    
    for pod in pods:
        if cg_path.startswith(pod.get('CgroupParent')):
            return pod
    
    return {}

def set_to_list(setstr):
    setstr = setstr.split(',')
    resset = []
    for line in setstr:
        try:
            line = line.strip()
            if not line:
                continue
            if line[0] == '(' and line[-1] == ')':
                line = line[1:-1]
            if line.find('null') >= 0:
                resset.append(-1)
                break
            if line.find('-') >= 0:
                resset.extend([i for i in range(int(line.split('-')[0]), int(line.split('-')[1])+1)])
            else:
                resset.append(int(line))
        except Exception as err:
            sys.stderr.write("set_to_list loop err {} lines {}\n".format(err, traceback.print_exc()))
            continue
    return resset

def bignum_to_num(ori_num):
    try:
        ret_num = ori_num
        if 'kB' in ori_num:
            ret_num = str(int(ori_num.rstrip('kB')) * 1024)
        elif 'KB' in ori_num:
            ret_num = str(int(ori_num.rstrip('KB')) * 1024)
        elif 'k' in ori_num:
            ret_num = str(int(ori_num.rstrip('k')) * 1024)
        elif 'K' in ori_num:
            ret_num = str(int(ori_num.rstrip('K')) * 1024)
        if 'M' in ori_num:
            ret_num = str(int(ori_num.rstrip('M')) * 1024*1024)
        if 'G' in ori_num:
            ret_num = str(int(ori_num.rstrip('G')) * 1024*1024*1024)
        if 'm' in ori_num:
            ret_num = str(int(ori_num.rstrip('m')) * 1024*1024)
        if 'g' in ori_num:
            ret_num = str(int(ori_num.rstrip('g')) * 1024*1024*1024)
        return ret_num
    except:
        return ori_num

def oom_get_ts(oom_time):
    if oom_time.find(".") == -1:
        return 0
    return float(oom_time)

def oom_get_ymdh(oom_time):
    if oom_time.find(":") == -1:
        return 0
    oom_time = oom_time.split()
    ret_time = ""
    if oom_time[0] in WEEK_LIST:
        ret_time = "%s-%02d-%02d %s"%(oom_time[4],MONTH_LIST.index(oom_time[1])+1,int(oom_time[2]),oom_time[3])
    elif oom_time[0] in CWEEK_LIST:
        ret_time = "%s-%02d-%02d %s"%(oom_time[4],CMONTH_LIST.index(oom_time[1])+1,int(oom_time[2]),oom_time[3])
    return normal_time2ts(ret_time)

def oom_time_to_normal_time(oom_time):
    if len(oom_time.strip()) == 0:
        return 0
    try:
        if oom_time.find(":") != -1:
            return oom_get_ymdh(oom_time)
        elif oom_time.find(".") != -1:
            return oom_get_ts(oom_time)
    except:
        return 0

def normal_time2ts(oom_time):
    if len(oom_time) < 8:
        return 0
    ts = time.strptime(oom_time, "%Y-%m-%d %H:%M:%S")
    return float(time.mktime(ts))

def oomcheck_get_spectime(time, oom_result):
    try:
        delta = 3153600000
        num = oom_result['oom_total_num']
        for i in range(oom_result['oom_total_num']):
            time_oom = oom_result['sub_msg'][i+1]['time']
            if abs(time - time_oom) <= delta:
                delta  = abs(time - time_oom)
                num = i
        return num + 1
    except Exception as err:
        sys.stderr.write("oomcheck_spectime error {}\n".format(err))

def oom_is_node_num(line):
    return "hugepages_size=1048576" in line

def oom_get_mem_allowed(oom_result, line, num):
    gp = mems_pattern.search(line)
    if not gp:
        return
    cpuset = gp.group(1)
    allowed = gp.group(2)
    oom_result['sub_msg'][num]['mems_allowed'] = set_to_list(allowed)
    oom_result['sub_msg'][num]['cpuset'] = cpuset

def oom_is_host_oom(reason):
    return reason == OOM_REASON_HOST

def oom_get_pid(oom_result, line, num):
    if OOM_END_KEYWORD in line:
        split_line = OOM_END_KEYWORD
    elif OOM_END_KEYWORD_4_19 in line:
        split_line = OOM_END_KEYWORD_4_19
    else:
        pid = line.split("pid=")[1].split(",")[0]
        oom_result['sub_msg'][num]['pid'] = pid
        return
    pid = line.strip().split(split_line)[1].strip().split()[0]
    oom_result['sub_msg'][num]['pid'] = pid

def oom_get_task_mem(oom_result, line, num):
    anon_rss = 0
    file_rss = 0
    shmem_rss = 0
    if line.find('anon-rss') != -1:
        anon_rss = line.strip().split('anon-rss:')[1].split()[0].strip(',')
    if line.find('file-rss') != -1:
        file_rss = line.strip().split('file-rss:')[1].split()[0].strip(',')
    if line.find('shmem-rss') != -1:
        shmem_rss = line.strip().split('shmem-rss:')[1].split()[0].strip(',')
    oom_result['sub_msg'][num]['killed_task_mem'] = (
        int(bignum_to_num(anon_rss)) + int(bignum_to_num(file_rss)) + int(bignum_to_num(shmem_rss)))/1024

def oom_get_host_mem(oom_result, line, num, nodename):
    oom_result['sub_msg'][num]['reason'] = OOM_REASON_HOST
    oom_result['sub_msg'][num]['type'] = 'host'
    memory_free = line.strip().split(nodename)[1].split()[0]
    memory_low = line.strip().split('low:')[1].split()[0]
    oom_result['sub_msg'][num]['host_free'] = memory_free
    oom_result['sub_msg'][num]['host_low'] = memory_low

def oom_get_cgroup_mem(oom_result, line, num):
    memory_usage = line.strip().split('memory: usage')[1].split()[0].strip(',')
    memory_limit = line.strip().split('limit')[1].split()[0].strip(',')
    oom_result['sub_msg'][num]['cg_usage'] = memory_usage
    oom_result['sub_msg'][num]['cg_limit'] = memory_limit

def oom_get_cgroup_shmem(oom_result, line, num):
    inanon = "0"
    anon = "0"
    rss = '0'
    oom_result['sub_msg'][num]['cg_inanon'] = inanon;
    oom_result['sub_msg'][num]['cg_anon'] = anon;
    oom_result['sub_msg'][num]['cg_rss'] = rss;

    if line.find("inactive_anon:") >= 0:
        inanon = line.strip().split("inactive_anon:")[1]
        inanon = inanon.split()[0][:-2]

        anon = line.strip().split("inactive_anon:")[1]
        anon = anon.strip().split("active_anon:")[1]
        anon = anon.split()[0][:-2]

        rss = line.strip().split("rss:")[1]
        rss = rss.split()[0][:-2]

        oom_result["sub_msg"][num]["cg_inanon"] = inanon
        oom_result["sub_msg"][num]["cg_anon"] = anon
        oom_result["sub_msg"][num]["cg_rss"] = rss
    elif line.find("inactive_anon") >= 0:
        inanon = line.strip().split("inactive_anon")[1]
        inanon = inanon.split()[0]

        anon = line.strip().split("inactive_anon")[1]
        anon = anon.strip().split("active_anon")[1]
        anon = anon.split()[0]

        rss = line.strip().split("anon")[1]
        rss = rss.split()[0]

        oom_result["sub_msg"][num]["cg_inanon"] = int(inanon) / 1024
        oom_result["sub_msg"][num]["cg_anon"] = int(anon) / 1024
        oom_result["sub_msg"][num]["cg_rss"] = int(rss) / 1024

def oom_get_cgroup_name(oom_result, line, num, is_510 = 0):
    is_host = False
    if is_510:
        if "CONSTRAINT_MEMCG" in line :
            oom_result['sub_msg'][num]['reason'] = OOM_REASON_CGROUP
            oom_result['sub_msg'][num]['type'] = 'cgroup'
            cgroup = line.split("task_memcg=")[1].split(",")[0]
            pcgroup = line.split("oom_memcg=")[1].split(",")[0]
            if pcgroup != cgroup:
                oom_result['sub_msg'][num]['reason'] = OOM_REASON_PCGROUP
            oom_result['sub_msg'][num]['cg_name'] = cgroup
    else:
        if "limit of host" in line :
            is_host = True
            oom_result['sub_msg'][num]['type'] = 'host'
        if is_host == False:
            oom_result['sub_msg'][num]['reason'] = OOM_REASON_CGROUP
            oom_result['sub_msg'][num]['type'] = 'cgroup'
        task_list = line.strip().split("Task in")[1].strip().split()
        cgroup = task_list[0]
        pcgroup = task_list[-1]
        if is_host == False and cgroup != pcgroup:
            #cgroup = pcgroup
            oom_result['sub_msg'][num]['reason'] = OOM_REASON_PCGROUP
        oom_result['sub_msg'][num]['cg_name'] = cgroup
        
def oom_get_cgroup_name_api(line, is_510 = 0):
    if is_510:
        if "CONSTRAINT_MEMCG" in line :
            cgroup = line.split("task_memcg=")[1].split(",")[0]
            pcgroup = line.split("oom_memcg=")[1].split(",")[0]
            return cgroup, pcgroup
    else:
        task_list = line.strip().split("Task in")[1].strip().split()
        return task_list[0], task_list[-1]

def oom_get_order(oom_result, line, num):
    order = int(line.strip().split("order=")[1].split()[0][:-1])
    oom_result['sub_msg'][num]['order'] = order

def oom_get_nodemask(oom_result, line, num):
    gp = node_pattern.search(line)
    if not gp:
        return
    nodemask = gp.group(1)
    oom_result['sub_msg'][num]['nodemask'] = set_to_list(nodemask)

def oom_set_node_oom(oom_result, num, node_num):
    task_mem_allow = oom_result['sub_msg'][num]['mems_allowed']
    is_host = oom_is_host_oom(oom_result['sub_msg'][num]['reason'])
    if is_host and len(task_mem_allow) != node_num:
            oom_result['sub_msg'][num]['reason'] = OOM_REASON_NODE
            oom_result['sub_msg'][num]['type'] = 'node'

def oom_get_hugepage(oom_result, line, num):
    if line.find('hugepages_total') == -1 or line.find('hugepages_size') == -1:
        return True
    oom = oom_result['sub_msg'][num]
    if 'hugepage' not in oom['meminfo']:
        oom['meminfo']['hugepage'] = 0

    hugetotal = line.split('hugepages_total=')[1]
    hugetotal = int(hugetotal.strip().split()[0])

    hugesize = line.split('hugepages_size=')[1]
    hugesize = int(hugesize.strip()[:-2])
    oom['meminfo']['hugepage'] = oom['meminfo']['hugepage'] + hugetotal*hugesize
    #print("hugetotal: {} size:{}".format(hugetotal, hugesize))

meminfo_pattern = ([re.compile("(active_anon):(\S+) (inactive_anon):(\S+) (isolated_anon):(\S+)")
, re.compile("(active_file):(\S+) (inactive_file):(\S+) (isolated_file):(\S+)")
, re.compile("(unevictable):(\S+) (dirty):(\S+) (writeback):(\S+)")
, re.compile("(slab_reclaimable):(\S+) (slab_unreclaimable):(\S+)")
, re.compile("(mapped):(\S+) (shmem):(\S+) (pagetables):(\S+) bounce:\S+")
, re.compile("(free):(\S+) (free_pcp):(\S+) (free_cma):(\S+)")])
def oom_get_meminfo(oom_result, lines, index, num):
    oom = oom_result['sub_msg'][num]
    oom['meminfo']['slab'] = 0
    oom['meminfo']['slabr'] = 0
    oom['meminfo']['active_anon'] = 0
    oom['meminfo']['inactive_anon'] = 0
    oom['meminfo']['active_file'] = 0
    oom['meminfo']['inactive_file'] = 0
    oom['meminfo']['unevictable'] = 0
    oom['meminfo']['pagetables'] = 0
    oom['meminfo']['free'] = 0
    oom['meminfo']['free_pcp'] = 0
    oom['meminfo']['rmem'] = 0
    oom['meminfo']['hugepage'] = 0
    oom['meminfo']['total_mem'] = 0
    if len(lines) < 10:
        return True
    line = lines
    for key in range(index, len(lines)):
        line = lines[key]
        if line.find('active_anon:') != -1 and line.find('inactive_anon:') != -1:
            break
    if key >= len(lines) -5:
        return True

    index = key
    for pattern in meminfo_pattern:
        line = lines[index]
        index += 1
        gp = pattern.search(line)
        if gp:
            for i in range(1,len(gp.groups()),2):
                if gp.group(i) == 'slab_unreclaimable':
                    oom['meminfo']['slab'] = int(gp.group(i+1))*4
                if gp.group(i) == 'slab_reclaimable':
                    oom['meminfo']['slabr'] = int(gp.group(i+1))*4
                else:
                    oom['meminfo'][gp.group(i)] = int(gp.group(i+1))*4
    return True

def oom_get_total_mem(oom_result, line, num):
    if "pages RAM" not in line:
        return True
    total = line.strip().split()[-3]
    total = int(total)*4
    oom_result['sub_msg'][num]['meminfo']['total_mem'] = total
    return True

def oom_get_rmem(oom_result, line, num):
    if "pages reserved" not in line:
        return True
    rmem = line.strip().split()[-3]
    rmem = int(rmem)*4
    oom_result['sub_msg'][num]['meminfo']['rmem'] = rmem
    oom_result['sub_msg'][num]['meminfo']['total_mem'] -= rmem

def oom_is_cgroup_oom(cgroup):
    return cgroup == OOM_REASON_PCGROUP or cgroup == OOM_REASON_CGROUP


def oom_costly_order(order):
    return order >=1 and order <=3

def oom_is_memfrag_oom(oom):
    free = oom['host_free']
    low = oom['host_low']
    order = oom['order']
    memfrag = False
    if free > low and oom_costly_order(order):
        memfrag = True
    return memfrag


def memleak_check(total, kmem):
    kmem = kmem/1024
    total = total/1024
    thres = 1024*6

    ''' 100G '''
    if total > 100*1024:
        thres = 1024*10
    if kmem > thres:
        return True
    elif (kmem > total*0.1) and (kmem > 1024*1.5):
        return True
    return False

def tcp_mem_check(used):
    skcheck_bin = work_path
    skcheck_bin += "/tools/skcheck -j /tmp/skcheck.json > /dev/null 2>&1"
    ret = os.popen(skcheck_bin).read()
    if not os.path.exists("/tmp/skcheck.json"):
        return ''
    skcheck = {}
    with open('/tmp/skcheck.json','r') as fp:
        skcheck = json.load(fp)
    if os.path.exists("/tmp/skcheck.json"):
        os.remove("/tmp/skcheck.json")
    if skcheck["queue_total"] > used*0.1:
        return skcheck
    if skcheck["tcp_mem"] > used*0.1:
        return skcheck
    return ''


def oom_is_memleak(oom, oom_result):
    if 'meminfo' not in oom:
        return False
    if 'slab' not in oom['meminfo']:
        return False
    meminfo = oom['meminfo']
    res = oom['json']
    summary = ''
    tcp = {}
    total = meminfo['total_mem']
    used = total - meminfo['slab'] - meminfo['slabr']
    used = used - (meminfo['active_anon'] + meminfo['inactive_anon'])
    used -= (meminfo['active_file'] + meminfo['inactive_file'])
    used -= (meminfo['unevictable'] + meminfo['pagetables'])
    used -= (meminfo['free'] + meminfo['hugepage'])
    if memleak_check(total, meminfo['slab']):
        res['leaktype'] = 'slab'
        res['leakusage'] = meminfo['slab']
        tcp = tcp_mem_check(used)
        if len(tcp) != 0:
            res["tcp_task"] = tcp["top_task"]
            res["tcp_mem"] = tcp["tcp_mem"]
        if 'topuslab' in oom['meminfo']:
            summary = "slab memleak, usage:%dKB(%s:%sKB, %s:%sKB)\n"%(meminfo['slab'], meminfo['topuslab'][0][0],meminfo['topuslab'][0][1]['total'],
                    meminfo['topuslab'][1][0],meminfo['topuslab'][1][1]['total'])
        else:
            summary = "slab memleak, usage:%dKB\n"%(meminfo['slab'])
    elif memleak_check(total, used):
        res['leaktype'] = 'allocpage'
        res['leakusage'] = used
        tcp = tcp_mem_check(used)
        if len(tcp) != 0:
            res["tcp_task"] = tcp["top_task"]
            res["tcp_mem"] = tcp["tcp_mem"]
        summary = "allocpage memleak, usage:%dKB\n"%(used)
    if memleak_check(total, meminfo['pagetables']):
        res['leaktype'] = 'pagetables'
        res['leakusage'] = meminfo['pagetables']
        summary += "pagetables usage:%dKB indicates lots of processes or lots of mmaps\n"%(meminfo['pagetables'])
    if len(summary) != 0 and  len(tcp) != 0:
        summary += "tcp_task:%s tcp_mem:%sKB\n"%(tcp["top_task"][0], tcp["tcp_mem"])
    if len(summary) != 0:
        return summary
    return False

def oom_host_output(oom_result, num):
    oom = oom_result['sub_msg'][num]
    reason = oom['reason']
    res = oom['json']
    summary = ''
    if not oom_is_host_oom(reason):
        return summary
    free = int(oom['host_free'][:-2])
    low = int(oom['host_low'][:-2])
    is_low = False
    if free * 0.9  < low:
        is_low = True
    oom['root'] = 'limit'
    if 'mems_allowed' in oom and oom['mems_allowed'][0] != -1 and oom_result['node_num'] != len(oom['mems_allowed']) and is_low:
        oom['reason'] = OOM_REASON_NODE
        oom['root'] = 'cpuset'
        summary += "total node:%d\n"%(oom_result['node_num'])
        summary += "cpuset:%s,"%(oom['cpuset'])
        summary += "cpuset config:"
        for node in oom['mems_allowed']:
            summary +="%s "%(node)
        summary += "\n"
        summary += "node free:%s,"%(oom['host_free'])
        summary += "low:%s\n"%(oom['host_low'])
        return summary
    elif 'nodemask' in oom and oom['nodemask'][0] != -1 and len(oom['nodemask']) != oom_result['node_num'] and free > low * 2:
        oom['reason'] = OOM_REASON_NODEMASK
        oom['root'] = 'policy'
        summary += "total node:%d\n"%(oom_result['node_num'])
        summary += "nodemask config:"
        for node in oom['nodemask']:
            summary +="%s "%(node)
        summary += "\n"
        summary += "node free:%s,"%(oom['host_free'])
        summary += "low:%s\n"%(oom['host_low'])
        return summary
    elif oom_is_memfrag_oom(oom):
        summary += "order:%d\n"%(oom['order'])
        oom['reason'] = OOM_REASON_MEMFRAG
        oom['root'] = 'frag'
        oom['json']['order'] = oom['order']
    leak = oom_is_memleak(oom, oom_result)
    if leak != False:
        oom['reason'] = OOM_REASON_MEMLEAK
        oom['root'] = 'memleak'
        summary += leak
    summary += "host free:%s,"%(oom['host_free'])
    summary += "low:%s\n"%(oom['host_low'])

    res['host_free'] = oom['host_free']
    res['host_low'] = oom['host_low']
    return summary

def oom_cgroup_output(oom_result, num):
    summary = ''
    oom = oom_result['sub_msg'][num]
    reason = oom['reason']
    res = oom['json']
    if not oom_is_cgroup_oom(reason):
        return summary
    pre = "cgroup"
    if oom['podName'] != 'unknow':
        pre = 'pod'
    elif oom['containerID'] != 'unknow':
        pre = 'container'
    summary += "%s memory usage: %s,"%(pre, oom['cg_usage'])
    summary += " limit: %s\n"%(oom['cg_limit'])
    summary += "oom cgroup: %s\n"%(oom['cg_name'])
    res['cg_usage'] = oom['cg_usage']
    res['cg_limit'] = oom['cg_limit']
    res['cgroup_oom_num'] = oom_result['cgroup'][oom['cg_name']]
    return summary

def oom_get_ipcs(oom_result, shmem):
    if oom_result['mode'] == 2:
        return False
    if not os.path.exists('/proc/sysvipc/shm'):
        return False
    fd = open('/proc/sysvipc/shm', 'r')
    lines = fd.readlines()
    fd.close()
    ipcs = 0
    for line in lines:
        if line.find('nattch') != -1 or line.find('shmid') != -1:
            continue
        line = line.split()
        ipcs += int(line[-2])
    if ipcs > shmem:
        return False
    if ipcs > shmem*0.55 * 1024:
        return True
    return False

def oom_cgroup_output_ext(oom_result, num):
    summary = ''
    oom = oom_result['sub_msg'][num]
    reason = oom['reason']
    res = oom['json']
    if not oom_is_cgroup_oom(reason):
        return summary
    if oom['root'] == 'limit' and reason ==  OOM_REASON_PCGROUP:
        oom['root'] = 'plimit'
    anon = int(oom["cg_inanon"]) + int(oom["cg_anon"]) - int(oom["cg_rss"])
    ipcs = False
    if anon > int(oom['cg_usage'][:-2])*0.3:
        ipcs = oom_get_ipcs(oom_result, anon)
        if ipcs == True:
            msg = "need to cleanup ipcs"
        else:
            msg = "need to cleanup tmpfs file"
        summary = ",but shmem usage %dKB,%s"%(anon, msg)
        oom['root'] = 'shmem'
        res['shmem'] = anon
    return summary


def oom_check_score(oom, oom_result):
    res = oom_result['max']
    res_total = oom_result['max_total']
    summary = ''
    if res['pid'] == 0:
        return False, "\n"
    if int(oom['pid'].strip()) == res['pid']:
        return False, '\n'
    if res['score'] >= 0:
        return False, "\n"
    many = False
    if (res_total['cnt']) > 2 and (res_total['rss']*0.8 > res['rss']):
        many = True
    if res['task'] == res_total['task'] or many == False:
        return True, '，process:%s(%s) memory usage: %dKB,oom_score_adj:%s The lower the value of oom_score_adj, the lower the chance that it is going to be killed.\n'%(res['task'],res['pid'],res['rss']*4,res['score'])

    return False, "\n"

def oom_check_dup(oom, oom_result):
    res = oom_result['max']
    res_total = oom_result['max_total']
    summary = '\n'
    if (res_total['rss']*4 > oom['killed_task_mem']*1.5) and (res_total['cnt'] > 2):
        oom['root'] = 'fork'
        summary = ',%d process:%s total memory usage: %dKB\n'%(res_total['cnt'],res_total['task'],res_total['rss']*4)
        oom['json']['fork_max_task'] = res_total['task']
        oom['json']['fork_max_cnt'] = res_total['cnt']
        oom['json']['fork_max_usage'] = res_total['rss'] * 4
    return summary

def oom_get_podName(cgName, cID, oom_result):
    podName = 'unknow'
    if oom_result['mode'] == 2:
        return podName
    if cgName.find("kubepods") == -1:
        return podName
    cmd = "crictl inspect " + cID +" 2>/dev/null" +" | grep -w io.kubernetes.pod.name "
    res = os.popen(cmd).read().strip()
    if res.find("io.kubernetes.pod.name") == -1:
        return podName
    res = res.split()
    if len(res) < 2:
        return podName
    if res[0].find("io.kubernetes.pod.name") != -1:
        podName = res[1][1:-2]
    return podName

def oom_get_containerID(cgName):
    containerID = 'unknow'
    index = cgName.find("cri-containerd-")
    if index != -1:
        index = index + 15
    if index == -1:
        index = cgName.find("docker-")
        if index != -1:
            index = index + 7
    if index != -1:
        containerID = cgName[index: index+13]
    return containerID

def oom_get_k8spod(oom_result,num):
    oom = oom_result['sub_msg'][num]
    res = oom['json']
    summary = ''
    cgName = oom['cg_name']
    # oom['podName'] = 'unknow'
    # oom['containerID'] = oom_get_containerID(cgName)
    # if oom['containerID'] != 'unknow':
    #     oom['podName'] = oom_get_podName(cgName, oom['containerID'], oom_result)
    
    # set log level to 0 to mute the go log
    podinfo_lib.SetLogLevel(0)
    oom['podName'] = get_pod_by_cgroup(cgName).get('Name', 'unknow')
    oom['containerID'] = get_con_by_cgroup(cgName).get('ContainerName', 'unknow')
    podinfo_lib.SetLogLevel(4)
    summary += "podName: %s, containerID: %s\n"%(oom['podName'], oom['containerID'])
    res['podName'] = oom['podName']
    res['containerID'] = oom['containerID']
    return summary

def oom_get_memstat(oom_result, line, num, key, lines):
    oom = oom_result['sub_msg'][num]
    tmp = {}
    while(key < len(lines)):
        if lines[key].find("workingset") >= 0:
            break
        if "] anon " in lines[key]:
            gp  = re.search("\] (\S*) (\d*)$",lines[key])
        else:
            gp  = re.match("^(\S*) (\d*)$",lines[key])
        if not gp:
            break
        tmp[gp.group(1)] = int(gp.group(2))/1024
        key += 1
    cg_usage = int(oom['cg_usage'][:-2])
    try:
        thresh =  0.85*(cg_usage - tmp['inactive_anon'] - tmp['active_anon'])
        for i in tmp:
            if tmp[i] >= thresh:
                oom['cgroup_major_used'] = {'name':i, 'value':tmp[i]}
                break
    except:
        pass

def oom_init_json(oom_result, num):
    oom = oom_result['sub_msg'][num]
    oom['json'] = {}
    res = oom['json']
    res['task'] = 'unknow'
    res['pid'] = 'unknow'
    res['task_mem'] = 0
    res['total_rss'] = 0
    res['root'] = 'unknow'
    res['type'] = 'unknow'
    res['podName'] = 'unknow'
    res['containerID'] = 'unkonw'
    res['cg_usage'] = 0
    res['cg_limit'] = 0
    res['leaktype'] = 'unknow'
    res['leakusage'] = 0
    res['shmem'] = 0
    res['tcp_mem'] = 0
    res['tcp_task'] = []
    res['cgroup_oom_num'] = 0
    res['meminfo'] = {}

def oom_output_msg(oom_result,num, summary):
    oom = oom_result['sub_msg'][num]

    oom_init_json(oom_result, num)
    oom['json']['rss_list_desc'] = oom['rss_list_desc']
    res = oom['json']
    res['total_oom'] = oom_result['oom_total_num']
    res['cg_name'] = oom['cg_name']
    res['host_free'] = oom.get('host_free',0)
    res['host_low'] = oom.get('host_low',0)
    res['meminfo'] = oom.get('meminfo',{})
    res['cgroup_major_used'] = oom.get('cgroup_major_used', {})
    reason = ''
    #print("oom time = {} spectime = {}".format(oom['time'], oom_result['spectime']))
    task = oom['task_name']
    task_mem = oom['killed_task_mem']
    if task_mem == 0 and oom['pid'] in oom['state_mem']:
        task_mem = oom['state_mem'][oom['pid']]
        oom['killed_task_mem'] = task_mem
    res['task'] = task[1:-1]
    res['pid'] = oom['pid']
    res['task_mem'] = task_mem
    res['total_rss'] = oom['state_mem']['total_rss']
    summary += "total rss: %d KB\n"%(oom['state_mem']['total_rss'])
    summary += "task: %s(%s), memory usage: %sKB\n"%(task[1:-1], oom['pid'], task_mem)
    #summary += "进程Kill次数:%s,进程内存占用量:%sKB\n"%(oom_result['task'][task], oom['killed_task_mem']/1024)
    #summary += "oom cgroup:%s"%(oom['cg_name'])
    oom['root'] = 'limit'
    if oom['cg_name'] in oom_result['cgroup']:
        #summary += "oom总次数:%s\n"%(oom_result['cgroup'][oom['cg_name']])
        summary += oom_get_k8spod(oom_result, num)
    summary += oom_cgroup_output(oom_result, num)
    #summary += "oom cgroup: %s\n"%(oom['cg_name'])
    summary += oom_host_output(oom_result, num)
    reason = "diagnones result: %s "%(oom['reason'])
    reason += oom_cgroup_output_ext(oom_result, num)
    if 'cmdline' in oom_result:
        reason += oom_result['cmdline']
    ret, sss = oom_check_score(oom, oom_result)
    if ret == False:
        reason+= oom_check_dup(oom, oom_result)
    else:
        reason += sss
    if oom['type']  == 'cgroup':
        if 'msg' in oom['state_mem']:
            summary += "memory stats:\n"
            for line in oom['state_mem']['msg']:
                summary += line +'\n'
    summary += "type: %s, root: %s\n"%(oom['type'], oom['root'])
    if 'kernelUsed' in oom['meminfo']:
        summary += "mem info: total:%sKB, user used:%sKB, kernel used:%s KB\nuser file used:%sKB, user anon used:%s KB, kernel resevred:%sKB, kernel page used:%sKB kernel uslab:%sKB" %(
                oom['meminfo']['total_mem'],oom['meminfo']['userUsed'], oom['meminfo']['kernelUsed'],oom['meminfo']['active_file']+oom['meminfo']['inactive_file'], oom['meminfo']['active_anon']+oom['meminfo']['inactive_anon'],oom['meminfo']['rmem'],oom['meminfo']['allocPage'], oom['meminfo']['slab'])
    res['root'] = oom['root']
    res['type'] = oom['type']
    res['result'] = reason
    res['msg'] = summary
    return reason + summary

def oom_get_max_task(num, oom_result):
    oom = oom_result['sub_msg'][num]
    dump_task = False
    res = oom_result['max']
    res_total = oom_result['max_total']
    rss_all = {}
    state = oom['state_mem']
    state['msg'] = []
    state['total_rss'] = 0
    s_bra = 2
    if 'oom_score_adj' not in '\n'.join(oom['oom_msg']):
        dump_task = True
    for line in oom['oom_msg']:
        try:
            if 'rss' in line and 'oom_score_adj' in line and 'name' in line:
                dump_task = True
                state['msg'].append(line)
                s_bra = line.count('[')
                continue
            if not dump_task:
                continue
            if 'Out of memory' in line:
                break
            if OOM_END_KEYWORD in line or OOM_END_KEYWORD_4_19 in line or OOM_END_KEYWORD_5_10 in line:
                break
            if line.count('[')  != s_bra:
                break
            pid_idx = line.rfind('[')
            last_idx = line.rfind(']')
            if pid_idx == -1 or last_idx == -1:
                continue
            pid = int(line[pid_idx+1:last_idx].strip())
            last_str = line[last_idx+1:].strip()
            last = last_str.split()
            if len(last) < 3:
                continue
            if last[-1] not in rss_all:
                rss_all[last[-1]] = {}
                rss_all[last[-1]]['rss'] = int(last[3])
                rss_all[last[-1]]['cnt'] = 1
            else:
                rss_all[last[-1]]['rss'] += int(last[3])
                rss_all[last[-1]]['cnt'] += 1
            state['msg'].append(line)
            state[str(pid)] = int(last[3]) *4
            state['total_rss'] += int(last[3]) *4
            if int(last[3]) >  res['rss']:
                res['rss'] = int(last[3])
                res['score'] = int(last[-2])
                res['task'] = last[-1]
                res['pid'] = pid
            if rss_all[last[-1]]['rss'] >  res_total['rss']:
                res_total['rss'] = int(rss_all[last[-1]]['rss'])
                res_total['cnt'] = int(rss_all[last[-1]]['cnt'])
                res_total['score'] = int(last[-2])
                res_total['task'] = last[-1]
        except Exception as err:
            sys.stderr.write("oom_get_max_task loop err {} lines {}\n".format(err, traceback.print_exc()))
            continue
    oom['rss_all'] = rss_all
    tmprss = sorted(oom['rss_all'].items(),key=lambda k:k[1]['rss'], reverse=True)[0:10]
    oom['rss_list_desc'] = []
    for task_info in tmprss:
        task = task_info[0]
        oom['rss_list_desc'].append({'task':task, 'rss':oom['rss_all'][task]['rss']})
    return res

def get_memgraph(oom_result,num,summary):
    try:
        meminfo = oom_result['sub_msg'][num]['meminfo']
        if len(meminfo) == 0:
            return
        user = meminfo["active_anon"] + meminfo["inactive_anon"]
        user += meminfo["active_file"] + meminfo["inactive_file"]
        #user += meminfo["Mlocked"]
        if "hugepage" in meminfo:
            user += meminfo["hugepage"]
        kernelOther = meminfo["slab"] + meminfo["slabr"]  + meminfo["pagetables"] + meminfo['unevictable']
        pageUsed = meminfo["total_mem"] - meminfo["free"] - user - kernelOther
        if pageUsed < 1:
            pageUsed = 1024
        meminfo["allocPage"] = pageUsed
        meminfo["kernelUsed"] = pageUsed + kernelOther + meminfo["rmem"]
        meminfo["userUsed"] = user
        meminfo["kernelOther"] = kernelOther
    except Exception as err:
        #sys.stderr.write("get_memgraph err {} lines {}\n".format(err, traceback.print_exc()))
        print("Can not get memgraph!")


def oom_get_unslab_info(oom_result, line, num, key, lines):
    column = {}
    for i in range(key+2,len(lines)):
        items = lines[i].split()
        if not (len(items) >=3 and items[-1].endswith('KB') and items[-2].endswith('KB')):
            break
        column[items[-3]] = {'active':int(items[-2][:-2]), 'total':int(items[-1][:-2])}
    tmprss = sorted(column.items(),key=lambda k:k[1]['total'], reverse=True)[0:10]
    #print tmprss
    meminfo = oom_result['sub_msg'][num]['meminfo']['topuslab'] = tmprss

zone_name = ["Normal","DMA32","Movable","DMA","Device","HighMem"]
def find_nodename(dmesglines):
    ori_dmesg = '\n'.join(dmesglines)
    for name in zone_name:
        if re.search("Node \d+ %s free.*"%name,ori_dmesg):
            return "%s free:"%name
    return ""

def oom_reason_analyze(num, oom_result, summary):
    try:
        nodename = find_nodename(oom_result['sub_msg'][num]['oom_msg'])
        node_num = 0
        lines = oom_result['sub_msg'][num]['oom_msg']
        line_len = len(lines)
        for key in range(line_len):
            try:
                line = lines[key]
                if "invoked oom-killer" in line:
                    oom_get_order(oom_result, line, num)
                elif OOM_END_KEYWORD_5_10 in line:
                    oom_get_cgroup_name(oom_result, line, num, 1)
                    oom_get_task_mem(oom_result, line, num)
                    oom_get_pid(oom_result, line, num)
                if 'nodemask' in line:
                    oom_get_nodemask(oom_result, line, num)
                if "mems_allowed=" in line:
                    oom_get_mem_allowed(oom_result, line, num)
                elif "Task in" in line:
                    oom_get_cgroup_name(oom_result, line, num)
                elif "memory: usage" in line:
                    oom_get_cgroup_mem(oom_result, line, num)
                elif "Memory cgroup stats for" in line:
                    if line.find("anon") == -1:
                        # 5.10 cgroup 内存使用信息提取
                        line = ' '.join([item.strip() for item in lines[
                                                                  key:key + min(
                                                                      40,
                                                                      line_len - key)]])
                    oom_get_cgroup_shmem(oom_result, line, num)
                elif len(nodename) != 0 and nodename in line:
                    oom_get_host_mem(oom_result, line, num, nodename)
                elif "Mem-Info:" in line:
                    oom_get_meminfo(oom_result, lines, key,num)
                elif "pages RAM" in line:
                    oom_get_total_mem(oom_result, line, num)
                elif "pages reserved" in line:
                    oom_get_rmem(oom_result, line, num)
                elif line.find('hugepages_total')!=-1:
                    if oom_is_node_num(line):
                        node_num += 1
                    oom_get_hugepage(oom_result, line, num)
                elif "] anon " in line:
                    oom_get_memstat(oom_result, line, num, key, lines)
                elif "Unreclaimable slab info" in line:
                    oom_get_unslab_info(oom_result, line, num, key, lines)
                elif OOM_END_KEYWORD in line or OOM_END_KEYWORD_4_19 in line:
                    oom_get_task_mem(oom_result, line, num)
                    oom_get_pid(oom_result, line, num)
            except Exception as err:
                sys.stderr.write("oom_reason_analyze loop err {} lines {}\n".format(err, traceback.print_exc()))
                continue
        get_memgraph(oom_result,num,summary)
        oom_result['node_num'] = node_num
        if 'cgroup_major_used' in oom_result['sub_msg'][num]:
            oom_result['sub_msg'][num]['reason'] = '%s, %s used over 85%% memory (%dKB)' %(oom_result['sub_msg'][num]['reason'],oom_result['sub_msg'][num]['cgroup_major_used']['name'],oom_result['sub_msg'][num]['cgroup_major_used']['value'])
        summary = oom_output_msg(oom_result, num, summary)
        oom_result['sub_msg'][num]['summary'] = summary
        if oom_result['json'] == 1:
            #print(json.dumps(oom_result['sub_msg'][num]['json'], encoding='utf-8', ensure_ascii=False))
            #print(json.dumps(oom_result['sub_msg'][num]['json'], ensure_ascii=False))
            pass
        else:
            print(summary)
        return summary
    except Exception as err:
        sys.stderr.write("oom_reason_analyze err {} lines {}\n".format(err, traceback.print_exc()))
        return ""

def init_oomresult(oomresult_sub):
    oomresult_sub['oom_msg'] = []
    oomresult_sub['time'] = 0
    oomresult_sub['cg_name'] = 'unknow'
    oomresult_sub['podName'] = 'unknow'
    oomresult_sub['containerID'] = 'unknow'
    oomresult_sub['cg_usage'] = 0
    oomresult_sub['cg_limit'] = 0
    oomresult_sub['task_name'] = '-unknow-'
    oomresult_sub['pid'] = "0"
    oomresult_sub['killed_task_mem'] = 0
    oomresult_sub['state_mem'] = {}
    oomresult_sub['meminfo'] = {}
    oomresult_sub['type'] = 'unknow'
    oomresult_sub['root'] = 'unknow'
    oomresult_sub['reason'] = ''
    oomresult_sub['summary'] = ''

def oom_dmesg_analyze(dmesgs, oom_result):
    try:
        meet_start = False
        meet_end = False
        start_i = -1
        end_i = 0
        dmesg = dmesgs.splitlines()
        task_name = "-unknow-"
        if task_name not in oom_result['task']:
            oom_result['task'][task_name] = 0
        for line in dmesg:
            line = line.strip()
            if "Command line" in line:
                if "elfcorehdr" in line and "crashkernel" not in line:
                    elf = line.split("elfcorehdr=")[1].split()[0]
                    oom_result['cmdline'] = ". there is no space reserved in second kernel, elfcorehdr:%s" % elf
            if OOM_BEGIN_KEYWORD in line:
                if meet_start:
                    oom_result['sub_msg'][oom_result['oom_total_num']]['oom_msg'] = dmesg[start_i: dmesg.index(line)]
                meet_start = True
                start_i = dmesg.index(line)
                oom_result['oom_total_num'] += 1
                oom_result['sub_msg'][oom_result['oom_total_num']] = {}
                init_oomresult(oom_result['sub_msg'][oom_result['oom_total_num']])
                if line.find('[') != -1:
                    oom_result['sub_msg'][oom_result['oom_total_num']]['time'] = oom_time_to_normal_time(line.split('[')[1].split(']')[0])
                oom_result['time'].append(oom_result['sub_msg'][oom_result['oom_total_num']]['time'])
            if OOM_END_KEYWORD in line or OOM_END_KEYWORD_4_19 in line or OOM_END_KEYWORD_5_10 in line:
                if start_i >= 0:
                    oom_result['sub_msg'][oom_result['oom_total_num']]['oom_msg'] = dmesg[start_i:dmesg.index(line)+1]
                elif dmesg.index(line) - end_i < 10:
                    meet_start = False
                    start_i = -1
                    end_i = dmesg.index(line)
                    continue
                else:
                    oom_result['oom_total_num'] += 1
                    oom_result['sub_msg'][oom_result['oom_total_num']] = {}
                    init_oomresult(oom_result['sub_msg'][oom_result['oom_total_num']])
                    oom_result['sub_msg'][oom_result['oom_total_num']]['oom_msg'] = dmesg[end_i + 1: dmesg.index(line)+1]
                    if line.find('[') != -1:
                        oom_result['sub_msg'][oom_result['oom_total_num']]['time'] = oom_time_to_normal_time(line.split('[')[1].split(']')[0])
                    oom_result['time'].append(oom_result['sub_msg'][oom_result['oom_total_num']]['time'])
                meet_start = False
                start_i = -1
                end_i = dmesg.index(line)
                if OOM_END_KEYWORD_4_19 in line:
                    OOM_END_KEYWORD_real = OOM_END_KEYWORD_4_19
                elif OOM_END_KEYWORD_5_10 in line:
                    OOM_END_KEYWORD_real = OOM_END_KEYWORD_5_10
                elif OOM_END_KEYWORD in line:
                    OOM_END_KEYWORD_real = OOM_END_KEYWORD
                if OOM_END_KEYWORD_5_10 in line:
                    task_name = "(" + line.split(OOM_END_KEYWORD_real)[1].split("task=")[1].split(",")[0] + ")"
                    cgroup_name = line.split('task_memcg=')[1].split(",")[0]
                    oom_result['sub_msg'][oom_result['oom_total_num']]['cgroup_name'] = cgroup_name
                    #print cgroup_name
                    if cgroup_name not in oom_result['cgroup']:
                        oom_result['cgroup'][cgroup_name] = 1
                    else:
                        oom_result['cgroup'][cgroup_name] += 1
                else:
                    task_name = line.split(OOM_END_KEYWORD_real)[1].split()[1].strip(',')
                oom_result['sub_msg'][oom_result['oom_total_num']]['task_name'] = task_name
                if task_name not in oom_result['task']:
                    oom_result['task'][task_name] = 1
                else:
                    oom_result['task'][task_name] += 1
            if OOM_CGROUP_KEYWORD in line:
                if start_i < 0 :
                    oom_result['oom_total_num'] += 1
                    oom_result['sub_msg'][oom_result['oom_total_num']] = {}
                    init_oomresult(oom_result['sub_msg'][oom_result['oom_total_num']])
                    start_i = dmesg.index(line)
                    if line.find('[') != -1:
                        oom_result['sub_msg'][oom_result['oom_total_num']]['time'] = oom_time_to_normal_time(line.split('[')[1].split(']')[0])
                    oom_result['time'].append(oom_result['sub_msg'][oom_result['oom_total_num']]['time'])
                cgroup_name = line.split('Task in')[1].split()[0]
                oom_result['sub_msg'][oom_result['oom_total_num']]['cgroup_name'] = cgroup_name
                #print cgroup_name
                if cgroup_name not in oom_result['cgroup']:
                    oom_result['cgroup'][cgroup_name] = 1
                else:
                    oom_result['cgroup'][cgroup_name] += 1
        if meet_start:
                oom_result['sub_msg'][oom_result['oom_total_num']]['oom_msg'] = dmesg[start_i:]
    except Exception as err:
        import traceback
        traceback.print_exc()
        sys.stderr.write("oom_dmesg_analyze failed {}\n".format(err))

def oom_dmesg_analyze2(dmesgs, oom_result):
    try:
        OOM_END_KEYWORD_real = OOM_END_KEYWORD
        if OOM_BEGIN_KEYWORD not in dmesgs:
            return
        dmesg = dmesgs.splitlines()
        oom_getting = 0
        task_name = "-unknow-"
        if task_name not in oom_result['task']:
            oom_result['task'][task_name] = 0
        for line in dmesg:
            line = line.strip()
            if len(line) > 0 and OOM_BEGIN_KEYWORD in line:
                oom_result['oom_total_num'] += 1
                oom_getting = 1
                oom_result['sub_msg'][oom_result['oom_total_num']] = {}
                oom_result['sub_msg'][oom_result['oom_total_num']]['oom_msg'] = []
                oom_result['sub_msg'][oom_result['oom_total_num']]['time'] = 0
                oom_result['sub_msg'][oom_result['oom_total_num']]['cg_name'] = 'unknow'
                oom_result['sub_msg'][oom_result['oom_total_num']]['cg_usage'] = 0
                oom_result['sub_msg'][oom_result['oom_total_num']]['cg_limit'] = 0
                oom_result['sub_msg'][oom_result['oom_total_num']]['task_name'] = task_name
                oom_result['sub_msg'][oom_result['oom_total_num']]['pid'] = "0"
                oom_result['sub_msg'][oom_result['oom_total_num']]['killed_task_mem'] = 0
                oom_result['sub_msg'][oom_result['oom_total_num']]['state_mem'] = {}
                oom_result['sub_msg'][oom_result['oom_total_num']]['meminfo'] = {}
                oom_result['sub_msg'][oom_result['oom_total_num']]['type'] = 'unknow'
                oom_result['sub_msg'][oom_result['oom_total_num']]['root'] = 'unknow'
                oom_result['sub_msg'][oom_result['oom_total_num']]['reason'] = ''
                oom_result['sub_msg'][oom_result['oom_total_num']]['summary'] = ''
                if line.find('[') != -1:
                    oom_result['sub_msg'][oom_result['oom_total_num']]['time'] = oom_time_to_normal_time(line.split('[')[1].split(']')[0])
                oom_result['time'].append(oom_result['sub_msg'][oom_result['oom_total_num']]['time'])
            if oom_getting == 1:
                oom_result['sub_msg'][oom_result['oom_total_num']]['oom_msg'].append(line)
                if OOM_END_KEYWORD in line or OOM_END_KEYWORD_4_19 in line or OOM_END_KEYWORD_5_10 in line:
                    if OOM_END_KEYWORD_4_19 in line:
                        OOM_END_KEYWORD_real = OOM_END_KEYWORD_4_19
                    elif OOM_END_KEYWORD_5_10 in line:
                        OOM_END_KEYWORD_real = OOM_END_KEYWORD_5_10
                    elif OOM_END_KEYWORD in line:
                        OOM_END_KEYWORD_real = OOM_END_KEYWORD
                    oom_getting = 0
                    if OOM_END_KEYWORD_real == OOM_END_KEYWORD_5_10:
                        task_name = "(" + line.split(OOM_END_KEYWORD_real)[1].split("task=")[1].split(",")[0] + ")"
                    else:
                        task_name = line.split(OOM_END_KEYWORD_real)[1].split()[1].strip(',')
                    oom_result['sub_msg'][oom_result['oom_total_num']]['task_name'] = task_name
                    if task_name not in oom_result['task']:
                        oom_result['task'][task_name] = 1
                    else:
                        oom_result['task'][task_name] += 1

                if OOM_CGROUP_KEYWORD in line:
                    cgroup_name = line.split('Task in')[1].split()[0]
                    oom_result['sub_msg'][oom_result['oom_total_num']]['cgroup_name'] = cgroup_name
                    #print cgroup_name
                    if cgroup_name not in oom_result['cgroup']:
                        oom_result['cgroup'][cgroup_name] = 1
                    else:
                        oom_result['cgroup'][cgroup_name] += 1
                if OOM_END_KEYWORD_5_10 in line:
                    cgroup_name = line.split('task_memcg=')[1].split(",")[0]
                    oom_result['sub_msg'][oom_result['oom_total_num']]['cgroup_name'] = cgroup_name
                    #print cgroup_name
                    if cgroup_name not in oom_result['cgroup']:
                        oom_result['cgroup'][cgroup_name] = 1
                    else:
                        oom_result['cgroup'][cgroup_name] += 1

    except Exception as err:
        import traceback
        traceback.print_exc()
        sys.stderr.write("oom_dmesg_analyze failed {}\n".format(err))

def oom_read_dmesg(data, mode, filename):
    if mode == 1:
        cmd = 'dmesg -T 2>/dev/null'
        output = os.popen(cmd)
        dmesgs = output.read().strip()
        output.close()
        data['dmesg'] = dmesgs
    elif mode == 2:
       with open(filename, 'r') as f:
           data['dmesg'] = f.read().strip()

def oom_diagnose(sn, data, mode):
    try:
        oom_result = {}
        oom_result['task'] = ""
        oom_result['json'] = data['json']
        oom_result['mode'] = mode
        oom_result['summary'] = ""
        oom_result['oom_total_num'] = 0
        oom_result['cgroup'] = {}
        oom_result['task'] = {}
        oom_result['sub_msg'] = {}
        oom_result['last_time'] = {}
        oom_result['time'] = []
        oom_result['spectime'] = data['spectime']
        oom_result['max'] = {'rss':0,'task':"",'score':0,'pid':0}
        oom_result['max_total'] = {'rss':0,'task':"",'score':0,'cnt':0}
        dmesgs = data['dmesg']
        if OOM_BEGIN_KEYWORD in dmesgs or  pid_pattern.search(dmesgs) is not None:
            submsg = ''
            oom_dmesg_analyze(dmesgs, oom_result)
            oom_result['summary'] += "total oom: %s\n"%oom_result['oom_total_num']

            sorted_tasks = sorted(oom_result['task'].items(), key = lambda kv:(kv[1], kv[0]), reverse=True)
            sorted_cgroups = sorted(oom_result['cgroup'].items(), key = lambda kv:(kv[1], kv[0]), reverse=True)
            last_oom = oom_result["oom_total_num"]
            num = oomcheck_get_spectime(oom_result['spectime'], oom_result)
            if num < 0 or num > last_oom:
                num = last_oom
            last_num = num-data['num']+1
            if last_num <= 0 :
                last_num = 1
            output_json = {}
            for i in range(last_num,num+1):
                oom_get_max_task(i, oom_result)
                submsg = oom_reason_analyze(i, oom_result, oom_result['summary'])
                output_json[str(oom_result['sub_msg'][i]['time'])] = oom_result['sub_msg'][i]['json']
                output_json["raw_ooms"] = data["raw_ooms"]
                output_json["task_container"] = data["task_container"]
                output_json["task_pod"] = data["task_pod"]
                output_json["oom_container"] = data["oom_container"]
                output_json["oom_pod"] = data["oom_pod"]
            if oom_result['json'] == 1:
                print(json.dumps(output_json, ensure_ascii=False))
            #res = oom_get_max_task(num, oom_result)
            #submsg = oom_reason_analyze(num, oom_result, oom_result['summary'])
            oom_result['summary'] = submsg
        data['oom_result'] = oom_result
        return oom_result['summary']

    except Exception as err:
        import traceback
        traceback.print_exc()
        print( "oom_diagnose failed {}".format(err))
        data['oom_result'] = oom_result
        return oom_result['summary']


def extract_oom_msg_by_time(data, dmesg, target_time, num):
    """提取oom信息"""
    def is_oom_end(line, idx, lines):
        if "Killed process" in line and "total-vm" in line:
            # 3.10 host,cgroup 或者 4.19 host 结束标记
            if len(lines) > idx + 1 and "oom_reaper" in lines[idx + 1]:
                # 4.19 host，下一行才是结束标记
                return False
            return True
        if "oom_reaper" in line:
            # 4.19 结束标记
            return True

        if "oom-kill:constraint" in line:
            # 5.10 结束标记
            if len(lines) > idx + 1 and "Out of memory" in lines[idx + 1]:
                # 5.10 host，下一行才是结束标记
                return False
            # 5.10 cgroup 结束标记
            return True

        if "Out of memory" in line and "total-vm" in line:
            # 5.10 host 结束标记
            return True
        return False
    
    def is_target_pod(cgroup_name):
        pod = get_pod_by_cgroup(cgroup_name)
        if pod.get('Name') == data['pod']:
            return True
        return False

    lines = dmesg.split("\n")
    res = []
    ooms = {}
    find_target_pod = False
    last_start_time = -1
    
    podinfo_lib.SetLogLevel(0)
    for idx, line in enumerate(lines):
        if line.find('[') != -1:
            time = oom_time_to_normal_time(line.split('[')[1].split(']')[0])
        else:
            time = -1
        if OOM_BEGIN_KEYWORD in line:
            res = []
            res.append(line)
            last_start_time = time
            ooms[last_start_time] = {
                "raw_oom": '',
                "task_container": "",
                "task_pod": "",
                "oom_container": "",
                "oom_pod": "",
            }
        elif is_oom_end(line, idx, lines):
            if OOM_END_KEYWORD_5_10 in line:
                cg, p_cg = oom_get_cgroup_name_api(line, is_510=1)
                ooms[last_start_time]['task_container'] = get_con_by_cgroup(cg).get('ContainerName','')
                ooms[last_start_time]['task_pod'] = get_pod_by_cgroup(cg).get('Name','')
                ooms[last_start_time]['oom_container'] = get_con_by_cgroup(p_cg).get('ContainerName','')
                ooms[last_start_time]['oom_pod'] = get_pod_by_cgroup(p_cg).get('Name','')
                if data['pod'] != '':
                    find_target_pod = is_target_pod(cg) or is_target_pod(p_cg)
            if len(res) > 0:
                # only pod mode enable and not find target pod
                # will not append oom info
                if not (data['pod'] != '' and find_target_pod == False):           
                    res.append(line)
                    ooms[last_start_time]['raw_oom'] = '\n'.join(res)
                # reset find target pod flag and logs
                find_target_pod = False
                res = []
        # find target pod's oom
        elif OOM_CGROUP_KEYWORD in line:
            cg, p_cg = oom_get_cgroup_name_api(line)
            ooms[last_start_time]['task_container'] = get_con_by_cgroup(cg).get('ContainerName','')
            ooms[last_start_time]['task_pod'] = get_pod_by_cgroup(cg).get('Name','')
            ooms[last_start_time]['oom_container'] = get_con_by_cgroup(p_cg).get('ContainerName','')
            ooms[last_start_time]['oom_pod'] = get_pod_by_cgroup(p_cg).get('Name','')
            if data['pod'] != '':
                find_target_pod = is_target_pod(cg) or is_target_pod(p_cg)
            res.append(line)
        else:
            res.append(line)
    
    podinfo_lib.SetLogLevel(4)
    min_span = float("inf")
    # 找到离 target_time 最近的一条 oom 信息
    data['raw_ooms'] = ''
    data['task_container'] = ''
    data['task_pod'] = ''
    data['oom_container'] = ''
    data['oom_pod'] = ''
    for t in sorted(ooms.keys(), reverse=True):
        # skip empty oom
        if ooms[t] == '':
            continue
        if abs(t - target_time) < min_span:
            min_span = abs(t - target_time)
            data["raw_ooms"] = ooms[t]['raw_oom']
            data["task_container"] = ooms[t]['task_container']
            data["task_pod"] = ooms[t]['task_pod']
            data["oom_container"] = ooms[t]['oom_container']
            data["oom_pod"] = ooms[t]['oom_pod']
    return ooms

#
# mode = 1 for  live mode
# mode = 2 for file mode
def main():
    sn = ''
    data = {}
    data['mode'] = 1
    data['json'] = 0
    data['num'] = 1
    data['filename'] = ''
    data['spectime'] = int(time.time())
    data['pod'] = ''
    set_podinfo_lib()
    get_opts(data)
    oom_read_dmesg(data, data['mode'], data['filename'])
    extract_oom_msg_by_time(data, data['dmesg'], data['spectime'], data['num'])
    oom_diagnose(sn, data, data['mode'])


def usage():
    print(
        """
            -h --help     print the help
            -f --dmesg file
            -l --live mode
            -t --time mode
            -j --output json
            -n --# of output results
            -p --pod (now only for console(raw ooms info))
           for example:
           sysak oomcheck.py
           sysak oomcheck.py -t "2021-09-13 15:32:22"
           sysak oomcheck.py -t 970665.476522
           sysak oomcheck.py -p podname
           sysak oomcheck.py -f oom_file.txt
           sysak oomcheck.py -f oom_file.txt -t 970665.476522
        """
    )

def get_opts(data):
    options,args = getopt.getopt(sys.argv[1:],"jhlf:t:p:n:",["json","help","file=","live=","time=","pod=","num="])
    for name,value in options:
        if name in ("-h","--help"):
            usage()
            sys.exit(0)
        elif name in ("-f","--file"):
            data['mode'] = 2
            data['filename'] = value
        elif name in ("-l","--live"):
            data['mode'] = 1
        elif name in ("-j","--json"):
            data['json'] = 1
        elif name in ("-n","--num"):
            data['num'] = int(value)
        elif name in ("-t","--time"):
            if '-' in value:
                value = normal_time2ts(value)
            data['spectime'] = float(value)
        elif name in ("-p","--pod"):
            data['pod'] = value

if __name__ == "__main__":
    main()

