#!/usr/bin/env nextflow

//
// Demultiplex Illumina BCL data using bcl-convert
//

include { BCLCONVERT } from "../../../modules/nf-core/bclconvert/main"
include { BCL2FASTQ  } from "../../../modules/nf-core/bcl2fastq/main"

workflow DEMUX_ILLUMINA {
    take:
        ch_flowcell // [[id:"", lane:""],samplesheet.csv, path/to/bcl/files]

    main:
        ch_versions = Channel.empty()
        ch_fastq    = Channel.empty()
        ch_reports  = Channel.empty()
        ch_stats    = Channel.empty()
        ch_interop  = Channel.empty()

        // MODULE: bclconvert
        // Demultiplex the bcl files
        BCLCONVERT( ch_flowcell )
        ch_fastq    = ch_fastq.mix(BCLCONVERT.out.fastq)
        ch_interop  = ch_interop.mix(BCLCONVERT.out.interop)
        ch_reports  = ch_reports.mix(BCLCONVERT.out.reports)
        ch_versions = ch_versions.mix(BCLCONVERT.out.versions)

        // MODULE: bcl2fastq
        // Demultiplex the bcl files
        BCL2FASTQ( ch_flowcell )
        ch_fastq    = ch_fastq.mix(BCL2FASTQ.out.fastq)
        ch_interop  = ch_interop.mix(BCL2FASTQ.out.interop)
        ch_reports  = ch_reports.mix(BCL2FASTQ.out.reports)
        ch_stats    = ch_stats.mix(BCL2FASTQ.out.stats)
        ch_versions = ch_versions.mix(BCL2FASTQ.out.versions)

        // Generate meta for each fastq
        ch_fastq_with_meta = generate_fastq_meta(ch_fastq)

    emit:
        fastq    = ch_fastq_with_meta
        reports  = ch_reports
        stats    = ch_stats
        interop  = ch_interop
        versions = ch_versions
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    FUNCTIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

// Add meta values to fastq channel
def generate_fastq_meta(ch_reads) {
    // Create a tuple with the meta.id and the fastq
    ch_reads.transpose().map{
        fc_meta, fastq ->
        def meta = [
            "id": fastq.getSimpleName().toString() - ~/_R[0-9]_001.*$/,
            "samplename": fastq.getSimpleName().toString() - ~/_S[0-9]+.*$/,
            "readgroup": [:],
            "fcid": fc_meta.id,
            "lane": fc_meta.lane
        ]
        meta.readgroup = readgroup_from_fastq(fastq)
        meta.readgroup.SM = meta.samplename

        return [ meta , fastq ]
    }
    // Group by meta.id for PE samples
    .groupTuple(by: [0])
    // Add meta.single_end
    .map {
        meta, fastq ->
        if (fastq.size() == 1){
            meta.single_end = true
        } else {
            meta.single_end = false
        }
        return [ meta, fastq.flatten() ]
    }
}

// https://github.com/nf-core/sarek/blob/7ba61bde8e4f3b1932118993c766ed33b5da465e/workflows/sarek.nf#L1014-L1040
def readgroup_from_fastq(path) {
    // expected format:
    // xx:yy:FLOWCELLID:LANE:... (seven fields)
    // or
    // FLOWCELLID:LANE:xx:... (five fields)
    def line

    path.withInputStream {
        InputStream gzipStream = new java.util.zip.GZIPInputStream(it)
        Reader decoder = new InputStreamReader(gzipStream, 'ASCII')
        BufferedReader buffered = new BufferedReader(decoder)
        line = buffered.readLine()
    }
    assert line.startsWith('@')
    line = line.substring(1)
    def fields = line.split(':')
    def rg = [:]

    if (fields.size() >= 7) {
        // CASAVA 1.8+ format, from  https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/FileFormat_FASTQ-files_swBS.htm
        // "@<instrument>:<run number>:<flowcell ID>:<lane>:<tile>:<x-pos>:<y-pos>:<UMI> <read>:<is filtered>:<control number>:<index>"
        sequencer_serial = fields[0]
        run_nubmer       = fields[1]
        fcid             = fields[2]
        lane             = fields[3]
        index            = fields[-1] =~ /[GATC+-]/ ? fields[-1] : ""

        rg.ID = [fcid,lane].join(".")
        rg.PU = [fcid, lane, index].findAll().join(".")
        rg.PL = "ILLUMINA"
    } else if (fields.size() == 5) {
        fcid = fields[0]

        rg.ID = fcid
    }
    return rg
}
