#!/usr/bin/env cwl-runner

cwlVersion: v1.2
class: Workflow
requirements:
   - class: StepInputExpressionRequirement
   - class: InlineJavascriptRequirement
   - class: MultipleInputFeatureRequirement
   - class: ScatterFeatureRequirement
   - class: SubworkflowFeatureRequirement
   

label: Longread amplicon classifcation workflow
doc: | 
  Workflow for quality assessment and taxonomic classification of amplicon long read sequences.
  In addition files are exported to their respective subfolders for easier data management in a later stage.
  Steps:  
      - NanoPlot read quality control
      - Emu
inputs:
  identifier:
    type: string
    doc: Identifier for this dataset used in this workflow
    label: identifier used
  reads:
    type: File[]
    label: Read file 
    doc: Read file in FASTA or FASTQ format (can be gz)
  reference_db:
    type: Directory
    doc: Reference database used in FASTA format
    label: Reference database
  readtype:
    label: Read type
    doc: Type of read nanopore or pacbio. default nanopore
    type:
      - type: enum
        symbols:
          - nanopore
          - pacbio
    default: nanopore

  fastq_rich:
    type: boolean
    doc: | 
          Input fastq is generated by albacore, MinKNOW or guppy  with additional information concerning channel and time. 
          Used to creating more informative quality plots (default false)
    label: Fastq rich (ONT)
    default: false
  
  threads:
    type: int?
    doc: Number of threads to use for computational processes
    label: Number of threads
    default: 4

  # Read filtering parameters
  skip_read_filter:
    type: boolean
    label: Skip quality filtering
    doc: "Skip quality reporting and filtering. (Default false)"
    default: false

  disable_quality_filtering:
    type: boolean?
    label: Disable_quality_filtering
    doc: |
      "Quality filtering is enabled by default. If this option is specified, quality filtering is disabled. 
      Quality plots will still be generated when skip_read_filter is false. (Default false)"
  qualified_quality_phred:
    type: int?
    default: 9
    label: Qualified_quality_phred
    doc: "the quality value that a base is qualified. Default 8 means phred quality >=Q9 is qualified."
  mean_qual:
    type: int?
    label: Mean quality
    doc: "if one read's mean_qual quality score < mean_qual, then this read is discarded. (Default 10)"
    default: 10
  minimum_length:
    type: int?
    label: Minimum length required
    doc: "Reads shorter will be discarded. (Default 1200)"
  length_limit:
    type: int?
    label: Maximum length limit
    doc: "Reads longer than length_limit will be discarded. (Default 1600)"
    default: 1600
  trim_front:
    type: int?
    label: Trim_front
    doc: "Trimming how many bases in front for read. (Default not set, 0)"
  trim_tail:
    type: int?
    label: trim_tail
    doc: "Trimming how many bases in tail for read. (Default not set, 0)"

  cut_front:
    type: boolean?
    label: Cut front
    doc: "Move a sliding window from front (5') to tail, drop the bases in the window if its mean quality < threshold, stop otherwise. Default false"
  cut_tail:
    type: boolean?
    label: Cut tail
    doc: "Move a sliding window from tail (3') to front, drop the bases in the window if its mean quality < threshold, stop otherwise Default false."
  cut_window_size:
    type: int?
    default: 4
    label: Cut window size
    doc: "The window size option shared by cut_front, cut_tail or cut_sliding. Range: 1~1000. Default 4"
  cut_mean_quality:
    type: int?
    label: Cut mean quality
    doc: "The mean quality requirement option shared by cut_front, cut_tail or cut_sliding. Range: 1~36. Default 20"

  start_adapter:
    type: string?
    label: start_adapter
    doc: "The adapter sequence at read start (5'). (Default auto-detect)"
  end_adapter:
    type: string?
    label: End adapter
    doc: "The adapter sequence at read end (3'). (Default auto-detect)"
  adapter_fasta:
    type: File?
    label: Adapter fasta
    doc: "Specify a FASTA file to trim both read ends by all the sequences in this FASTA file. (Default None)"
  disable_adapter_trimming:
    type: boolean
    label: Disable adapter trimming
    doc: "Adapter trimming is enabled by default. If this option is specified, adapter trimming is disabled. Default true"
    default: true

  output_filtered_reads:
    type: boolean
    label: Output filtered reads
    doc: "Output filtered reads when filtering is applied. (Default false)"
    default: false

  destination:
    type: string?
    label: Output Destination
    doc: Optional Output destination used for cwl-prov reporting.


outputs:
  quality_folder:
    type: Directory?
    label: NanoPlot
    doc: Folder with quality plots from Nanoplot
    outputSource: workflow_longread_quality/reports_folder
  filtered_reads:
    type: File?
    doc: Filtered reads output file
    outputSource: step_output_filtered_reads/reads_out

  emu_abundance:
    type: File
    label: Emu abundances
    outputSource: emu/abundance
  emu_read_assignment_distributions:
    type: File
    label: Emu read assignment distribution
    outputSource: emu/read_assignment_distributions
  emu_unclassified:
    type: File
    label: Emu unclassified
    outputSource: emu/unclassified

steps:
#############################################
#### Merge reads
  workflow_merge_reads:
    label: Merge paired reads
    doc: Creates a single file object. Also merges reads if multiple files are given.
    when: $(inputs.skip_read_filter)
    run: workflow_merge_se_reads.cwl
    in:
      skip_read_filter: skip_read_filter
      filename: 
        source: identifier
        valueFrom: $(self)_merged
      reads: reads
    out: [merged_reads]

#############################################
#### Quality workflow Oxford Nanopore
  workflow_longread_quality:
    label: Oxford Nanopore quality workflow
    doc: Quality, filtering and taxonomic classification workflow for Oxford Nanopore reads
    when: $(!inputs.skip_read_filter)
    run: workflow_longread_quality.cwl
    in:
      skip_read_filter: skip_read_filter
      disable_quality_filtering: disable_quality_filtering

      identifier: identifier
      longreads: reads
      readtype: readtype

      minimum_length: minimum_length
      length_limit: length_limit
      
      qualified_quality_phred: qualified_quality_phred
      mean_qual: mean_qual
      
      trim_front: trim_front
      trim_tail: trim_tail

      cut_front: cut_front
      cut_tail: cut_tail
      cut_window_size: cut_window_size
      cut_mean_quality: cut_mean_quality
      
      start_adapter: start_adapter
      end_adapter: end_adapter
      
      adapter_fasta: adapter_fasta
      disable_adapter_trimming: disable_adapter_trimming

      threads: threads
    out: [filtered_reads, reports_folder]

#############################################
#### Emu abundance
  emu:
    label: Emu abundance
    doc: Emu abundance; species-level taxonomic abundance for full-length 16S read 
    run: ../tools/emu/emu_abundance.cwl
    in:
      reads:
        source:
          - workflow_longread_quality/filtered_reads
          - workflow_merge_reads/merged_reads
        pickValue: first_non_null

      mapping_type:
        source: readtype
        valueFrom:
          ${
            if (inputs.readtype == "nanopore") {
              var mappingtype = "map-ont";
            } else if (inputs.readtype == "pacbio") {
              var mappingtype = "map-pb";
            } else {
              var mappingtype = null
            }
            return mappingtype;
          }

      identifier: identifier
      threads: threads

      reference_db: reference_db
      
      keep_read_assignments:
        default: true
      keep_counts:
        default: true
      output_unclassified:
        default: true
  
    out:
      [abundance, read_assignment_distributions,unclassified]

#############################################
#### Output reads 
  step_output_filtered_reads:
    label: Output reads
    doc: Step needed to output filtered reads.
    when: $(!inputs.skip_read_filter && inputs.output_filtered_reads)
    run:
      class: ExpressionTool
      requirements:
        InlineJavascriptRequirement: {}
      inputs:
        reads_in: File?
      outputs:
        reads_out: File
      expression: |
        ${ return {'reads_out': inputs.reads_in}; }
    in:
      skip_read_filter: skip_read_filter
      output_filtered_reads: output_filtered_reads

      reads_in: workflow_longread_quality/filtered_reads
    out:
      [reads_out]

#############################

s:author:
  - class: s:Person
    s:identifier: https://orcid.org/0000-0001-9524-5964
    s:email: mailto:bart.nijsse@wur.nl
    s:name: Bart Nijsse
  - class: s:Person
    s:identifier: https://orcid.org/0000-0001-8172-8981
    s:email: mailto:jasper.koehorst@wur.nl
    s:name: Jasper Koehorst
  - class: s:Person
    s:identifier: https://orcid.org/0009-0001-1350-5644
    s:email: mailto:changlin.ke@wur.nl
    s:name: Changlin Ke

s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2024-02-00"
s:dateModified: "2025-07-25"
s:license: https://spdx.org/licenses/Apache-2.0 
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"

$namespaces:
  s: https://schema.org/