//
// Subworkflow with functionality specific to the nf-core/createtaxdb pipeline
//

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { UTILS_NFSCHEMA_PLUGIN   } from '../../nf-core/utils_nfschema_plugin'
include { paramsSummaryMap        } from 'plugin/nf-schema'
include { samplesheetToList       } from 'plugin/nf-schema'
include { completionEmail         } from '../../nf-core/utils_nfcore_pipeline'
include { completionSummary       } from '../../nf-core/utils_nfcore_pipeline'
include { imNotification          } from '../../nf-core/utils_nfcore_pipeline'
include { UTILS_NFCORE_PIPELINE   } from '../../nf-core/utils_nfcore_pipeline'
include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    SUBWORKFLOW TO INITIALISE PIPELINE
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

workflow PIPELINE_INITIALISATION {
    take:
    version           // boolean: Display version and exit
    validate_params   // boolean: Boolean whether to validate parameters against the schema at runtime
    monochrome_logs   // boolean: Do not use coloured log outputs
    nextflow_cli_args //   array: List of positional nextflow CLI args
    outdir            //  string: The output directory where the results will be saved
    input             //  string: Path to input samplesheet

    main:

    ch_versions = Channel.empty()

    //
    // Print version and exit if required and dump pipeline parameters to JSON file
    //
    UTILS_NEXTFLOW_PIPELINE(
        version,
        true,
        outdir,
        workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1,
    )

    //
    // Validate parameters and generate parameter summary to stdout
    //
    UTILS_NFSCHEMA_PLUGIN(
        workflow,
        validate_params,
        null,
    )

    //
    // Check config provided to the pipeline
    //
    UTILS_NFCORE_PIPELINE(
        nextflow_cli_args
    )

    // IMPORTANT: DO NOT REMOVE VALIDATEINPUTPARAMETERS() FUNCTION EVEN IF TEMPLATE SYNC TRIES TO DO IT!
    // Context: it gets removed as we skip `igenomes` , but we need to keep it for custom validation

    //
    // Custom validation for pipeline parameters
    //
    validateInputParameters()

    //
    // Create channel from input file provided through params.input
    //

    ch_samplesheet = Channel.fromList(samplesheetToList(params.input, "assets/schema_input.json"))

    // Validate we have unique file names for DNA FASTAs
    ch_samplesheet
        .filter { _meta, fasta_dna, _fasta_aa -> fasta_dna }
        .map { _meta, fasta_dna, _fasta_aa ->
            fasta_dna.getBaseName(fasta_dna.name.endsWith('.gz') ? 1 : 0)
        }
        .collect()
        .map { fasta_dna ->
            if (fasta_dna.size() > fasta_dna.sort().unique(false).size()) {
                // duplicate detection from https://stackoverflow.com/a/35922565
                def not_unique_dna = fasta_dna.countBy { it }.grep { it.value > 1 }.collect { it.key }
                error("[nf-core/createtaxdb] ERROR: All DNA FASTA filenames (also after decompressing!) must be unique! Check for filename(s) starting with: ${not_unique_dna.join(', ')}")
            }
        }

    // Validate we have unique file names for AA FASTAs
    ch_samplesheet
        .filter { _meta, _fasta_dna, fasta_aa -> fasta_aa }
        .map { _meta, _fasta_dna, fasta_aa ->
            fasta_aa.getBaseName(fasta_aa.name.endsWith('.gz') ? 1 : 0)
        }
        .collect()
        .map { fasta_aa ->
            if (fasta_aa.size() > fasta_aa.sort().unique(false).size()) {
                // duplicate detection from https://stackoverflow.com/a/35922565
                def not_unique_aa = fasta_aa.countBy { it }.grep { it.value > 1 }.collect { it.key }
                error("[nf-core/createtaxdb] ERROR: All AA FASTA filenames (also after decompressing!) must be unique! Check for filename(s) starting with: ${not_unique_aa.join(', ')}")
            }
        }

    emit:
    samplesheet = ch_samplesheet
    versions    = ch_versions
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    SUBWORKFLOW FOR PIPELINE COMPLETION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

workflow PIPELINE_COMPLETION {
    take:
    email           //  string: email address
    email_on_fail   //  string: email address sent on pipeline failure
    plaintext_email // boolean: Send plain-text email instead of HTML
    outdir          //    path: Path to output directory where results will be published
    monochrome_logs // boolean: Disable ANSI colour codes in log output
    hook_url        //  string: hook URL for notifications
    multiqc_report  //  string: Path to MultiQC report

    main:
    summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json")
    def multiqc_reports = multiqc_report.toList()

    //
    // Completion email and summary
    //
    workflow.onComplete {
        if (email || email_on_fail) {
            completionEmail(
                summary_params,
                email,
                email_on_fail,
                plaintext_email,
                outdir,
                monochrome_logs,
                multiqc_reports.getVal(),
            )
        }

        completionSummary(monochrome_logs)
        if (hook_url) {
            imNotification(summary_params, hook_url)
        }
    }

    workflow.onError {
        log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting")
    }
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    FUNCTIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

//
// Check and validate pipeline parameters
//
def validateInputParameters() {

    // Validate CENTRIFUGE auxiliary file combinations
    if (params.build_centrifuge && [!params.nucl2taxid, !params.nodesdmp, !params.namesdmp].any()) {
        error('[nf-core/createtaxdb] Supplied --build_centrifuge, but missing at least one of: --nucl2taxid, --nodesdmp, or --namesdmp (all are mandatory for CENTRIFUGE)')
    }

    // Validate DIAMOND auxiliary file combinations
    if (params.build_diamond && [!params.prot2taxid, !params.nodesdmp, !params.namesdmp].any()) {
        error('[nf-core/createtaxdb] Supplied --build_diamond, but missing at least one of: --prot2taxid, --nodesdmp, or --namesdmp (all are mandatory for DIAMOND)')
    }

    // Validate GANON parameter combinations
    if (params.build_ganon && [!params.nodesdmp, !params.namesdmp].any()) {
        error('[nf-core/createtaxdb] Supplied --build_ganon, but missing at least one of: --nodesdmp, or --namesdmp (all are mandatory for GANON)')
    }

    // Validate BRACKEN/KRAKEN parameter combinations
    if ((params.build_bracken || params.build_kraken2) && [!params.accession2taxid, !params.nodesdmp, !params.namesdmp].any()) {
        error('[nf-core/createtaxdb] Supplied --build_kraken2 or --bracken, but missing at least one of: --accession2taxid, --nodesdmp, or --namesdmp (all are mandatory for BRACKEN/KRAKEN2)')
    }

    // Validate KRAKENUNIQ auxiliary file combinations
    if (params.build_krakenuniq && [!params.nucl2taxid, !params.nodesdmp, !params.namesdmp].any()) {
        error('[nf-core/createtaxdb] Supplied --build_krakenuniq, but missing at least one of: --nucl2taxid, --nodesdmp, or --namesdmp (all are mandatory for KRAKENUNIQ)')
    }

    // Validate MALT auxiliary file combinations
    if (params.build_krakenuniq && [!params.malt_mapdb].any()) {
        error('[nf-core/createtaxdb] Supplied --build_malt, but missing: --malt_mapdb (all are mandatory for MALT)')
    }

    if (params.build_malt && !(params.malt_build_options.contains('--sequenceType DNA') || params.malt_build_options.contains('--sequenceType Protein'))) {
        error('[nf-core/createtaxdb] Supplied --build_malt, but --malt_build_options must contain at a minimum malt-build parameters --sequenceType DNA or --sequenceType Protein')
    }

    // Validate samplesheet generation parameters
    if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
        error('[nf-core/createtaxdb] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
    }

    if (params.generate_downstream_samplesheets && !params.generate_tar_archive && params.generate_samplesheet_dbtype == 'tar') {
        error('[nf-core/createtaxdb] Supplied --generate_downstream_samplesheets with --generate_samplesheet_dbtype tar, but missing --generate_tar_archive (mandatory for tar archive output).')
    }
}

//
// Validate channels from input samplesheet
//
def validateInputSamplesheet(input) {
    def (metas, fastqs) = input[1..2]

    // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
    def endedness_ok = metas.collect { meta -> meta.single_end }.unique().size == 1
    if (!endedness_ok) {
        error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
    }

    return [metas[0], fastqs]
}
//
// Generate methods description for MultiQC
//
def toolCitationText() {
    def citation_text = [
        "Tools used in the workflow included:",
        params.build_bracken ? "Bracken (Lu et al. 2017)," : "",
        params.build_centrifuge ? "Centrifuge (Kim et al. 2016)," : "",
        params.build_ganon ? "ganon (Piro et al. 2020)," : "",
        params.build_diamond ? "DIAMOND (Buchfink et al. 2015)," : "",
        params.build_kaiju ? "SeqKit (Shen et al. 2024) and Kaiju (Menzel et al. 2016)," : "",
        params.build_kmcp ? "KMCP (Shen et al. 2023)," : "",
        params.build_kraken2 ? "Kraken2 (Wood et al. 2019)," : "",
        params.build_krakenuniq ? "KrakenUniq (Breitwieser et al. 2018)," : "",
        params.build_malt ? "MALT (Vågene et al. 2018)," : "",
        "and MultiQC (Ewels et al. 2016)",
        ".",
    ].join(' ').trim()

    return citation_text
}

def toolBibliographyText() {
    def reference_text = [
        params.build_bracken ? '<li>Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: estimating species abundance in metagenomics data. PeerJ. Computer Science, 3(e104), e104. <a href="https://doi.org/10.7717/peerj-cs.104">10.7717/peerj-cs.104</a></li>' : "",
        params.build_centrifuge ? '<li>Kim, D., Song, L., Breitwieser, F. P., & Salzberg, S. L. (2016). Centrifuge: rapid and sensitive classification of metagenomic sequences. Genome Research, 26(12), 1721–1729.  <a href="https://doi.org/10.1101/gr.210641.116">10.1101/gr.210641.116</a></li>' : "",
        params.build_ganon ? "<li>Piro, V. C., Dadi, T. H., Seiler, E., Reinert, K., & Renard, B. Y. (2020). Ganon: Precise metagenomics classification against large and up-to-date sets of reference sequences. Bioinformatics (Oxford, England), 36(Suppl_1), i12–i20. <a href=\"https://doi.org/10.1093/bioinformatics/btaa458\">10.1093/bioinformatics/btaa458</a></li>" : "",
        params.build_diamond ? '<li>Buchfink, B., Xie, C., & Huson, D. H. (2015). Fast and sensitive protein alignment using DIAMOND. Nature Methods, 12(1), 59–60. <a href="https://doi.org/10.1038/nmeth.3176">10.1038/nmeth.3176</a></li>' : "",
        params.build_kaiju ? '<li>Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. iMeta, e191. <a href=\"https://doi.org/10.1002/imt2.191\">https://doi.org/10.1002/imt2.191</a></li>' : "",
        params.build_kaiju ? '<li>Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. <a href="https://doi.org/10.1038/ncomms11257">10.1038/ncomms11257</a></li>' : "",
        params.build_kmcp ? '<li>Shen, W., Xiang, H., Huang, T., Tang, H., Peng, M., Cai, D., Hu, P., & Ren, H. (2023). KMCP: accurate metagenomic profiling of both prokaryotic and viral populations by pseudo-mapping. Bioinformatics (Oxford, England), 39(1). <a href=\"https://doi.org/10.1093/bioinformatics/btac845\">10.1093/bioinformatics/btac845</a></li>' : "",
        params.build_kraken2 ? '<li>Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257.  <a href="https://doi.org/10.1186/s13059-019-1891-0">10.1186/s13059-019-1891-0</a></li>' : "",
        params.build_krakenuniq ? '<li>Breitwieser, F. P., Baker, D. N., & Salzberg, S. L. (2018). KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology, 19(1), 198.  <a href="https://doi.org/10.1186/s13059-018-1568-0">10.1186/s13059-018-1568-0</a></li>' : "",
        params.build_malt ? '<li>Vågene, Å. J., Herbig, A., Campana, M. G., Robles García, N. M., Warinner, C., Sabin, S., Spyrou, M. A., Andrades Valtueña, A., Huson, D., Tuross, N., Bos, K. I., & Krause, J. (2018). Salmonella enterica genomes from victims of a major sixteenth-century epidemic in Mexico. Nature Ecology & Evolution, 2(3), 520–528.  <a href="https://doi.org/10.1038/s41559-017-0446-6">10.1038/s41559-017-0446-6</a></li>' : "",
        '<li>Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: <a href="https://doi.org/10.1093/bioinformatics/btw354">10.1093/bioinformatics/btw354</li>',
    ].join(' ').trim()

    return reference_text
}

def methodsDescriptionText(mqc_methods_yaml) {
    // Convert  to a named map so can be used as with familiar NXF ${workflow} variable syntax in the MultiQC YML file
    def meta = [:]
    meta.workflow = workflow.toMap()
    meta["manifest_map"] = workflow.manifest.toMap()

    // Pipeline DOI
    if (meta.manifest_map.doi) {
        // Using a loop to handle multiple DOIs
        // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
        // Removing ` ` since the manifest.doi is a string and not a proper list
        def temp_doi_ref = ""
        def manifest_doi = meta.manifest_map.doi.tokenize(",")
        manifest_doi.each { doi_ref ->
            temp_doi_ref += "(doi: <a href=\'https://doi.org/${doi_ref.replace("https://doi.org/", "").replace(" ", "")}\'>${doi_ref.replace("https://doi.org/", "").replace(" ", "")}</a>), "
        }
        meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2)
    }
    else {
        meta["doi_text"] = ""
    }
    meta["nodoi_text"] = meta.manifest_map.doi ? "" : "<li>If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. </li>"

    // Tool references
    meta["tool_citations"] = ""
    meta["tool_bibliography"] = ""

    meta["tool_citations"] = toolCitationText().replaceAll(', \\.', ".").replaceAll('. \\.', ' .').replaceAll(', \\.', '.')
    meta["tool_bibliography"] = toolBibliographyText()


    def methods_text = mqc_methods_yaml.text

    def engine = new groovy.text.SimpleTemplateEngine()
    def description_html = engine.createTemplate(methods_text).make(meta)

    return description_html.toString()
}
