#!/usr/bin/env python3

"""
Command-line interface for installing and running hecatomb.

Michael Roach, Q2 2021
"""


import argparse
import sys
import os
import subprocess
import time
import yaml
import glob
from shutil import copyfile


### VERSION
BINDIR = os.path.dirname(os.path.realpath(__file__))
vFH = open(os.path.join(BINDIR, '../', 'VERSION'), 'r')
VERSION = vFH.readline()
vFH.close()


### PARSE COMMAND LINE ARGUMENTS
parser = argparse.ArgumentParser(
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description=("\n"
                 "██╗  ██╗███████╗ ██████╗ █████╗ ████████╗ ██████╗ ███╗   ███╗██████╗ \n"
                 "██║  ██║██╔════╝██╔════╝██╔══██╗╚══██╔══╝██╔═══██╗████╗ ████║██╔══██╗\n"
                 "███████║█████╗  ██║     ███████║   ██║   ██║   ██║██╔████╔██║██████╔╝\n"
                 "██╔══██║██╔══╝  ██║     ██╔══██║   ██║   ██║   ██║██║╚██╔╝██║██╔══██╗\n"
                 "██║  ██║███████╗╚██████╗██║  ██║   ██║   ╚██████╔╝██║ ╚═╝ ██║██████╔╝\n"
                 "╚═╝  ╚═╝╚══════╝ ╚═════╝╚═╝  ╚═╝   ╚═╝    ╚═════╝ ╚═╝     ╚═╝╚═════╝ \n"                                                   
                 f"\nVersion {VERSION}\n"
                 "\n"
                 "Commands:\n"
                 "    install     Download and install the databases (only have to do once)\n"
                 "    run         Run the Hecatomb pipeline\n"
                 "    config      Copy the default configfile to the current directory\n"
                 "                (for use with --configfile)\n"
                 "    listHosts   List the available host genomes for contaminant removal\n"
                 "    addHost     Process and add a new host genome for contaminant removal\n"
                 "\n"),
    epilog= ("\n"
             "To Run Hecatomb:\n"
             "hecatomb run --reads fastq/\n"
             "\n"
             "Run Hecatomb and skip assembly:\n"
             "hecatomb run --reads fastq/ --skipAssembly\n"
             "\n"
             "Run test dataset (will take a few hours):\n"
             "hecatomb run --test \n"
             "\n"
             "To run hecatomb on a HPC cluster (e.g. slurm, sge, etc.):\n"
             "hecatomb run --profile slurm\n\n"
             "\n"
             "Complete documentation for Hecatomb is available at:\n"
             "https://hecatomb.readthedocs.io\n"
             )
)
parser.add_argument('command', choices=['install', 'run', 'config', 'listHosts', 'addHost'])
parser.add_argument('--reads', help=('Either directory containing seq data, or TSV specifying samples and seq files. '
                                     'See https://hecatomb.readthedocs.io/en/latest/usage/ for more info.'))
parser.add_argument('--configfile', help='Specify your own config file. First copy the template with "hecatomb config".')
parser.add_argument('--test', help='Use the test dataset (ignores --reads)', action='store_true')
parser.add_argument('--skipAssembly', help="Don't perform an assembly", action='store_true', default=False)
parser.add_argument('--results', help='Directory to write the output files', default='hecatomb_out')
parser.add_argument('--host', help='Host name for contaminant removal', default='human')
parser.add_argument('--hostfa', help='Host reference (for addHost command only)')
parser.add_argument('--profile', help='Snakemake profile for use on cluster')
parser.add_argument('--threads', help='Number of threads to use (ignored if using --profile)', default='32')
parser.add_argument('--fast', help='Use faster but less sensitive settings for MMSeqs searches', action='store_true')
parser.add_argument('--snake',
                    help='Pass one or more commands to Snakemake e.g. --snake=--dry-run',
                    action='append')
args = parser.parse_args()


### GLOBAL VARIABLES
SNEKDIR = os.path.normpath(os.path.join(BINDIR, '../', 'snakemake', 'workflow'))
TESTDIR = os.path.normpath(os.path.join(BINDIR, '../', 'test_data'))
CONDDIR = os.path.normpath(os.path.join(SNEKDIR, 'conda'))


def readConfig():
    """
    Read in the config file
    :return: config dictionary
    """
    if args.configfile:
        configFile = args.configfile
    else:
        configFile = os.path.join(BINDIR, '../', 'snakemake', 'config', 'config.yaml')
    with open(configFile, 'r') as stream:
        config = yaml.safe_load(stream)
    return config


def initialiseSnakeCommand():
    """
    Initialise the Snakemake command (add profile or threads args, default args, and additional snake args)
    :return command: string for use with subprocess.run()
    """
    if args.profile:
        config = readConfig()
        runJobs = (f'--profile {args.profile} '
                   '--default-resources '
                      f'mem_mb={config["defaultMem"]} '
                      f'time={config["defaultTime"]} '
                      f'jobs={config["defaultJobs"]} ')
    else:
        runJobs = f'-j {args.threads}'
    # Decorate the Snakemake run command
    command = (f'snakemake {runJobs} '
               '--use-conda '
               '--conda-frontend mamba '
               '--rerun-incomplete '
               '--printshellcmds '
               '--nolock '
               '--show-failed-logs '
               f'--conda-prefix {CONDDIR} ')
    # User-specified config file?
    if args.configfile:
        command = command + f' --configfile {args.configfile} '
    # Additional Snakemake commands
    if args.snake:
        command = command + ' '.join(c for c in args.snake)
    return command


def runSnakeSys(command):
    """
    Function to run snakemake and confirm success

    :param command: The snakemake command to run
    :return: none
    """
    sys.stderr.write(f'Running snakemake command:\n{command}\n')
    if not subprocess.run(command.split()).returncode==0:
        exit(1)
    return None


# Functions for installing and running hecatomb
def install():
    """
    The install function. This will run the download and install database snakemake pipeline.
    :return:
    """
    snakeCommand = initialiseSnakeCommand()
    sys.stderr.write("Checking and downloading database files\n")
    snekFile = os.path.normpath(os.path.join(SNEKDIR, 'DownloadDB.smk'))
    runCommand = f'{snakeCommand} -s {snekFile} -C Output={args.results}'
    runSnakeSys(runCommand)
    return None


def run():
    """
    The run function. This will run the hecatome snakemake pipeline itself.
    :return:
    """
    if args.test:
        readDir = TESTDIR
    elif not args.reads:
        sys.stderr.write('ERROR: please specify the directory or TSV file of reads with --reads\n')
        exit(1)
    else:
        readDir = args.reads
    snakeCommand = initialiseSnakeCommand()
    sys.stderr.write("Running Hecatomb\n")
    snekFile = os.path.normpath(os.path.join(SNEKDIR, 'Hecatomb.smk'))
    runCommand = (f'{snakeCommand} '
                  f'-s {snekFile} '
                  f'-C Reads={readDir} '
                    f'Host={args.host} '
                    f'Output={args.results} '
                    f'SkipAssembly={args.skipAssembly} '
                    f'Fast={args.fast} ')
    runSnakeSys(runCommand)
    sys.stderr.write("Generating run report\n")
    runCommand = runCommand + ' --report'
    runSnakeSys(runCommand)
    return None


def dumpConfig():
    """
    Copy the system template config file to the working directory
    :return:
    """
    if not os.path.isfile('hecatomb.config.yaml'):
        templateConfig = os.path.join(BINDIR, '../', 'snakemake', 'config', 'config.yaml')
        copyfile(templateConfig, 'hecatomb.config.yaml')
    else:
        sys.stderr.write("\n"
                         "Error: Output file 'hecatomb.config.yaml' already exists.\n"
                         "\n")
        sys.exit(1)
    return None


def addHost():
    """
    Add a new host to the Hecatomb database
    :return:
    """
    if not args.hostfa:
        sys.stderr.write('ERROR: Host fasta file required when adding new host genome\n')
        exit(1)
    sys.stderr.write('Adding new virus-masked host for use with hecatomb\n')
    if args.host == 'human':
        sys.stderr.write(f'Enter host name [{args.host}]:')
        host = input()
        host = host.rstrip()
        if host == '':
            host = args.host
    else:
        host = args.host
    snakeCommand = initialiseSnakeCommand()
    snekFile = os.path.normpath(os.path.join(SNEKDIR, 'AddHost.smk'))
    runCommand = f'{snakeCommand} -s {snekFile} -C HostFa={args.hostfa} HostName={host} Output={args.results}'
    runSnakeSys(runCommand)
    return None


def listHosts():
    """
    Print the list of available host genomes and exit
    :return:
    """
    config = readConfig()
    # check DB location
    if config['Databases'] is None:
        DBDIR = os.path.join(BINDIR, '../databases')
    else:
        DBDIR = config['Databases']
    # print host genomes
    sys.stdout.write('Available host genomes:\n')
    HOSTPATH = os.path.join(DBDIR, "host", "*")
    hostFastas = [os.path.basename(x) for x in glob.glob(HOSTPATH)]
    for h in hostFastas:
        if h != 'virus_shred.fasta.gz':
            sys.stdout.write(f'{h}\n')
    sys.stdout.write('\n')
    return None


commands = {'install':install, 'run':run, 'config':dumpConfig, 'addHost':addHost, 'listHosts':listHosts}
commands[args.command]()

time.sleep(1)
exit(0)

