#!/usr/bin/env python
# vim: tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80 smarttab expandtab

"""
* Copyright (C) 2012  Sangoma Technologies Corp.
* All Rights Reserved.
*
* Author(s)
* Moises Silva <moy@sangoma.com>
*
* This code is Sangoma Technologies Confidential Property.
* Use of and access to this code is covered by a previously executed
* non-disclosure agreement between Sangoma Technologies and the Recipient.
* This code is being supplied for evaluation purposes only and is not to be
* used for any other purpose.

Sangoma Core Collector

Read the core dump from stdin and store it at /var/core
then launch gdb and execute 'gdb <proc-path> /var/core/xxx'

Accept --path as the path to store the core
Accept --pid as the pid of the process (to look for additional info in /proc/<pid>

Note this script can also be run manually (not as a result of a core dump), in which case
you must provide the path to an existing core dump and the --exe option to the executable

** Be extremely careful when performing modifications to this script, it is easy
   to screw up something and fail to store the core properly!
"""

import sys
import os
import shutil
import tempfile
import time
import signal
import subprocess
import logging
import string
from prod_def import *
from optparse import OptionParser
from logging.handlers import SysLogHandler

class CoreLogger:
    formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(process)d] [%(levelname)s] %(message)s')
    logger = logging.getLogger('sng-core-collector')
    logger.setLevel(logging.INFO)

    syslog_handler = SysLogHandler('/dev/log')
    syslog_handler.setFormatter(formatter)
    logger.addHandler(syslog_handler)

class CoreConfig:

    options = {}

    @classmethod
    def load(self):

        conf_file_path = "/usr/local/sng/conf/sng-core-collector.conf"

        if os.path.exists(conf_file_path) == False:
            return False

        try:
            conf_lines = open(conf_file_path).readlines()

            for line in conf_lines:
                name, var = line.split('=')
                self.options[name.strip()] = var.rstrip('\n').strip('\"')
        except:
            CoreLogger.logger.error("Could not open configuration file for reading, skipping...")
            return False

        return True

    @classmethod
    def backtrace_max_time(self):

        default_time = 120 # 2 minutes default

        try:
            return int(self.options.get("BACKTRACE_MAX_TIME", str(default_time)))
        except:
            CoreLogger.logger.error("Invalid integer value for max backtrace time, using default value (%d)." % default_time)

        return default_time

    @classmethod
    def check_svc_to_restart(self, svc_name):

        svc_vars = self.options.get("RESTART_SERVICES").split(',')
        matching = [s for s in svc_vars if svc_name in s]

        if len(matching) != 0:
            return True
        else:
            return False


def get_max_core_size(path):

    res = os.statvfs(path);
    return res.f_bavail * res.f_bsize;

def get_svc_name_from_path(path):

    # The second element is service name
    return path.split('/')[-1].split('.')[1]

def get_pid_file_name(pid):

    # Especially for service nsc, we need to remove freeswitch.pid

    prod_def = ProdDef()
    service_run_path = prod_def.get_base_dir() + "/run/"

    for ufile in os.listdir(service_run_path):
        complete_file_name = service_run_path + ufile.decode()
        file = open(complete_file_name, 'r')
        if pid == file.readline().strip():
            file.close()
            return complete_file_name
        file.close()

    return None


UMASK = 0
WORKDIR = "/"
MAXFD = 1024

if (hasattr(os, "devnull")):
    REDIRECT_TO = os.devnull
else:
    REDIRECT_TO = "/dev/null"

def execute_watchdog(pid, watchdog):
    while watchdog > 0:
        status = os.waitpid(pid, os.WNOHANG)

        if status == (0, 0):
            time.sleep(1)
            watchdog = watchdog - 1
        else:
            break

    if watchdog <= 0:
        os.kill(pid, signal.SIGKILL)
        CoreLogger.logger.warning("Timeout reached, watchdog killing pid %d." % pid)
        sys.exit(1)

    return watchdog

def create_daemon(cmd1, cmd2, watchdog):

    try:
        pid = os.fork()
    except OSError, e:
        raise Exception, "%s [%d]" % (e.strerror, e.errno)

    if (pid == 0):
        # this is child process
        os.setsid()

        try:
            pid = os.fork()
        except OSError, e:
            raise Exception, "%s [%d]" % (e.strerror, e.errno)

        if (pid == 0):
            os.chdir(WORKDIR)
            os.umask(UMASK)
        else:
            os._exit(0)
    else:
        # for parent process, return 1
        return 1
    try:
        maxfd = os.sysconf("SC_OPEN_MAX")
    except (AttributeError, ValueError):
        maxfd = 1024

    for fd in range(0, maxfd):
        try:
            os.close(fd)
        except OSError:
            pass

    os.open(REDIRECT_TO, os.O_RDWR)

    os.dup2(0, 1)
    os.dup2(0, 2)

    nulldev = open(os.devnull, 'w')

    if watchdog is not None and cmd1 is not None:
        try:
            pid = os.fork()
        except OSError, e:
            raise Exception, "%s [%d]" % (e.strerror, e.errno)

        if (pid > 0):
            watchdog = execute_watchdog(pid, watchdog)
        else:
            os.execv(cmd1[0], cmd1)
    else:
        if cmd1 is not None:
            subprocess.call(cmd1, stdout=nulldev, stderr=nulldev)

    if watchdog is not None and cmd2 is not None:
        try:
            pid = os.fork()
        except OSError, e:
            raise Exception, "%s [%d]" % (e.strerror, e.errno)

        if (pid > 0):
            watchdog = execute_watchdog(pid, watchdog)
        else:
            os.execv(cmd2[0], cmd2)
    else:
        if cmd2 is not None:
            subprocess.call(cmd2, stdout=nulldev, stderr=nulldev)

    os._exit(0)

def main():
    exe = None
    parser = OptionParser()

    parser.add_option('', '--path',
            dest='path',
            help='core path location',
            metavar='CORE_PATH')

    parser.add_option('', '--pid',
            dest='pid',
            help='PID of the defunct process',
            metavar='PID')

    parser.add_option('', '--exe',
            dest='exe',
            help='Path to the core dump executable',
            metavar='EXE_PATH')

    (options, args) = parser.parse_args()

    if options.path is None:
        sys.stderr.write('--path is required\n')
        sys.exit(1)

    valid_core = True

    # We accept either new paths or an existing path
    # if a non-existing path is given, we assume the core dump
    # binary data is in stdin and write it to the location pointed
    # by --path
    if os.path.exists(options.path):
        if options.exe is None:
            CoreLogger.logger.error('--exe is required when the core file already exists\n')
            sys.exit(1)

        exe = options.exe
        if not os.path.isfile(options.path):
            CoreLogger.logger.error('--path already exists and is not a file!\n')
            sys.exit(1)

        CoreLogger.logger.info('Using existing core file %s' % (options.path))
    else:
        if options.pid is None:
            CoreLogger.logger.error('--pid is required when the core given in --path does not already exist\n')
            sys.exit(1)

        CoreLogger.logger.info('Storing core file %s from PID %s' % (options.path, options.pid))
        dst = open(options.path, 'w+')
        blocksize = 4096
        sizelimit = get_max_core_size(os.path.dirname(options.path))
        while sizelimit > 0:
            data = sys.stdin.read(blocksize)
            if len(data) == 0:
                break
            sizelimit = sizelimit - len(data)
            dst.write(data)

        dst.close()

        if sizelimit <= 0:
            CoreLogger.logger.error('Reached size limit for core dump, removing truncated core.')
            os.unlink(options.path)
            valid_core = False
        else:
            exe = os.readlink('/proc/%s/exe' % (options.pid))
            CoreLogger.logger.info('Executable is %s' % exe)

    cmd_file = ''

    if valid_core:
        tmpfd, cmd_file = tempfile.mkstemp()
        f = os.fdopen(tmpfd, 'w+')
        f.write('set logging file %s\n' % (options.path + '.bt'))
        f.write('set logging on\n')
        f.write('thread apply all bt full\n')
        f.write('quit\n')
        f.close()

    # load settings now, as we will need them below
    CoreConfig.load()

    # Before we generate backtrace file, check if the service needs to be
    # restarted or not
    service_name = get_svc_name_from_path(options.path)
    if options.pid is not None:
        service_run_name = get_pid_file_name(options.pid)
    else:
        service_run_name = None
    nulldev = open(os.devnull, 'w')
    retcode = -1
    if CoreConfig.check_svc_to_restart(service_name):
        cmd = ['/usr/local/sng/scripts/sng-reset-service', service_name]
        subprocess.call(cmd, stdout=nulldev, stderr=nulldev)
        if service_run_name is not None and os.path.exists(service_run_name):
            os.remove(service_run_name)
        cmd = ['/sbin/service', service_name, "recover"]
        retcode = create_daemon(cmd, None, None)

    if retcode != 0 and valid_core:
        # Now that we have the core dump and an exe, we can generate the full gdb backtrace
        cmd1 = ['/bin/nice', '-n', '19', '/usr/bin/gdb', exe, options.path, '-x', cmd_file, '-batch']
        cmd2 = ['/bin/rm', '-f', cmd_file]
        create_daemon(cmd1, cmd2, CoreConfig.backtrace_max_time())

    sys.exit(0)

if __name__ == '__main__':
    main()

