#!/usr/bin/env cwltool
cwlVersion: v1.2
class: Workflow
requirements:
  StepInputExpressionRequirement: {}
  InlineJavascriptRequirement: {}
  MultipleInputFeatureRequirement: {}
  SubworkflowFeatureRequirement: {}
  ScatterFeatureRequirement: {}

label: nanopore workflow
doc: |
    Workflow for sequencing with ONT nanopore, from basecalling to assembly quality.
    Steps:
      - Guppy (basecalling of raw reads)
      - MinIONQC (quality check)
      - FASTQ merging from multi into one file
      - Kraken2 (taxonomic classification)
      - Krona (classification visualization)
      - Flye (de novo assembly)
      - Medaka (assembly polishing)
      - QUAST (assembly quality reports)

      The dependencies are either accessible from https://unlock-icat.irods.surfsara.nl (anonymous) or 
      by using the conda / pip environments as shown in https://unlock-icat.irods.surfsara.nl/infrastructure/binaries/scripts/setup.sh
      
outputs:
  guppy_output:
    label: Guppy for CPU
    doc: Basecalling of raw reads with Guppy
    type: Directory
    outputSource: guppy_files_to_folder/results
  minionqc_output:
    label: MinION-Quality-Check
    doc: Quality check of basecalling with MinIONQC
    type: Directory
    outputSource: minionqc_files_to_folder/results
  merge_output:
    label: FASTQ files merged
    doc: Concatenation of FASTQ files from Guppy
    type: Directory
  kraken2_output:
    label: Kraken2 reports
    doc: Kraken2 taxonomic classification reports
    type: Directory
    outputSource: workflow_nanopore_noguppy/kraken2_output
  krona_output:
    label: Krona taxonomy visualization
    doc: Visual presentation in HTML of Kraken2 results
    type: Directory
    outputSource: workflow_nanopore_noguppy/krona_output
  flye_output:
    label: Flye de novo assembler for single-molecule reads
    doc: Flye output directory
    type: Directory
    outputSource: workflow_nanopore_noguppy/flye_output
  medaka_output:
    label: Medaka polisher
    doc: Polishing of Flye assembly
    type: Directory
    outputSource: workflow_nanopore_noguppy/medaka_output
  quast_output:
    label: QUAlity assessment
    doc: QUAST analysis output
    type: Directory
    outputSource: workflow_nanopore_noguppy/quast_output

inputs:
# General
  identifier:
    type: string
    doc: Identifier for this dataset used in this workflow
    label: identifier used
  threads:
    type: int?
    doc: number of threads to use for computational processes
    label: number of threads
    default: 2
# Guppy
  fast5_files:
    type: File[]
    doc: folder with Nanopore raw reads
    label: nanopore reads
  configuration_command:
    type: string
# Kraken2
  kraken_database:
    type: Directory
    doc: database location of kraken2
# Medaka
  basecall_model:
    type: string
    doc: basecalling model used with Guppy
    label: basecalling model

steps:
#############################################
#### basecalling with Guppy for CPU
  workflow_basecalling:
    label: Guppy-CPU basecalling
    doc: Basecalling with Guppy for CPU of raw reads to FASTQ reads with summary
    run: ../guppy/guppy.cwl
    in:
      fast5_files: fast5_files
      configuration_command: configuration_command
      threads: threads
    out: [reads_directory, fastq_reads, summary, telemetry, guppy_log]
#############################################
#### Nanopore classification and assembly workflow
  workflow_nanopore_noguppy:
    label: Nanopore workflow
    doc: The rest of the nanopore workflow without basecalling
    run: workflow_nanopore_noguppy.cwl
    in:
      identifier: identifier
      threads: threads
      nanopore_fastq_reads: workflow_basecalling/fastq_reads
      kraken_database: kraken_database
      basecall_model: basecall_model
    out: [kraken2_output, krona_output, flye_output, medaka_output, quast_output]
#############################################
#### Move to folder if not part of a workflow
  guppy_files_to_folder:
    doc: Preparation of Guppy output files to a specific output folder
    label: Guppy output folder
    run: ../expressions/files_to_folder.cwl
    in:
      files:
        source: [workflow_basecalling/summary, workflow_basecalling/telemetry, workflow_basecalling/guppy_log]
        linkMerge: merge_flattened
      destination:
        valueFrom: $("1_Guppy_basecalling")
    out:
      [results]
############################################# LOCAL INSTALL, BUT USE CONDA IF NO CONFLICTS
#### quality check of basecalling with MinIONQC
  workflow_minionqc:
    label: MinIONQC quality check
    doc: Plots and statistics generated with MinIONQC from basecalling with Guppy
    run: ../minionqc/minionqc.cwl
    in:
      seq_summary: workflow_basecalling/summary
      threads: threads
    out: [qc_files]
#############################################
#### Move to folder if not part of a workflow
  minionqc_files_to_folder:
    doc: Preparation of MinIONQC output files to a specific output folder
    label: MinIONQC output folder
    run: ../expressions/files_to_folder.cwl
    in:
      files:
        source: [workflow_minionqc/qc_files]
        linkMerge: merge_flattened
      destination:
        valueFrom: $("2_MinIONQC_qualitycheck")
    out:
      [results]
#############################################

s:author:
  - class: s:Person
    s:identifier: https://orcid.org/0000-0002-5516-8391
    s:email: mailto:german.royvalgarcia@wur.nl
    s:name: Germán Royval
  - class: s:Person
    s:identifier: https://orcid.org/0000-0001-8172-8981
    s:email: mailto:jasper.koehorst@wur.nl
    s:name: Jasper Koehorst
  - class: s:Person
    s:identifier: https://orcid.org/0000-0001-9524-5964
    s:email: mailto:bart.nijsse@wur.nl
    s:name: Bart Nijsse

s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2021-12-10"
s:license: https://spdx.org/licenses/Apache-2.0 
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"

$namespaces:
  s: https://schema.org/
  edam: http://edamontology.org/

$schemas:
 - http://edamontology.org/EDAM_1.18.owl