cwlVersion: v1.2
class: Workflow
doc: >-
  The workflow starts with selecting atrial fibrillation as the search term. The
  workflow starts with selecting Ibrutinib as the search term. Gene sets with
  set labels containing atrial fibrillation were queried from Enrichr[1].
  Identified matching terms from the MGI Mammalian Phenotype Level 4 2021[2]
  library were assembled into a collection of gene sets. A GMT was extracted
  from the Enrichr results for MGI_Mammalian_Phenotype_Level_4_2021. A consensus
  gene set was created by only retaining genes that appear in at least two sets.
  Identified matching terms from the GWAS Catalog 2019[4] library were assembled
  into a collection of gene sets. A GMT was extracted from the Enrichr results
  for GWAS_Catalog_2019. A consensus gene set was created by only retaining
  genes that appear in at least two sets. The gene sets collected were combined
  into one gene set library. Gene sets with set labels containing Ibrutinib were
  queried from Enrichr[1]. Identified matching terms from the LINCS L1000 Chem
  Pert Consensus Sigs[5] library were assembled into a collection of gene sets.
  A GMT was extracted from the Enrichr results for
  LINCS_L1000_Chem_Pert_Consensus_Sigs. Multiple GMTs were combined into one
  GMT. The collection of gene sets was then visualized with a Supervenn diagram
  Fig.. 

  1. Xie, Z. et al. Gene Set Knowledge Discovery with Enrichr. Current Protocols
  vol. 1 (2021). doi:10.1002/cpz1.90

  2. Blake, J. A. et al. Mouse Genome Database (MGD): Knowledgebase for
  mouse–human comparative biology. Nucleic Acids Research vol. 49 D981–D987
  (2020). doi:10.1093/nar/gkaa1083

  4. Sollis, E. et al. The NHGRI-EBI GWAS Catalog: knowledgebase and deposition
  resource. Nucleic Acids Research vol. 51 D977–D985 (2022).
  doi:10.1093/nar/gkac1010

  5. Evangelista, J. E. et al. SigCom LINCS: data and metadata search engine for
  a million gene expression signatures. Nucleic Acids Research vol. 50 W697–W709
  (2022). doi:10.1093/nar/gkac328
requirements: {}
inputs:
  step-1-data:
    label: Disease Input
    doc: Start with a Disease
    type: File
  step-2-data:
    label: Drug Input
    doc: Start with a Drug
    type: File
  step-10-data:
    label: Assemble GMT from Gene Sets
    doc: Group multiple independently generated gene sets into a single GMT
    type: File
outputs:
  step-1-output:
    label: Disease
    doc: Disease Term
    type: File
    outputSource: step-1/output
  step-2-output:
    label: Drug
    doc: Drug Term
    type: File
    outputSource: step-2/output
  step-3-output:
    label: Enrichr Term Search Results
    doc: Results of an Enrichr Term Search
    type: File
    outputSource: step-3/output
  step-4-output:
    label: Enrichr Phenotype Set
    doc: Set of Phenotypes
    type: File
    outputSource: step-4/output
  step-5-output:
    label: Gene Matrix Transpose
    doc: Terms mapped to genes
    type: File
    outputSource: step-5/output
  step-6-output:
    label: Gene Set
    doc: Set of Genes
    type: File
    outputSource: step-6/output
  step-7-output:
    label: Enrichr Phenotype Set
    doc: Set of Phenotypes
    type: File
    outputSource: step-7/output
  step-8-output:
    label: Gene Matrix Transpose
    doc: Terms mapped to genes
    type: File
    outputSource: step-8/output
  step-9-output:
    label: Gene Set
    doc: Set of Genes
    type: File
    outputSource: step-9/output
  step-10-output:
    label: Gene Matrix Transpose
    doc: Terms mapped to genes
    type: File
    outputSource: step-10/output
  step-11-output:
    label: Enrichr Term Search Results
    doc: Results of an Enrichr Term Search
    type: File
    outputSource: step-11/output
  step-12-output:
    label: Enrichr Drug Set
    doc: Set of Drugs
    type: File
    outputSource: step-12/output
  step-13-output:
    label: Gene Matrix Transpose
    doc: Terms mapped to genes
    type: File
    outputSource: step-13/output
  step-14-output:
    label: Gene Matrix Transpose
    doc: Terms mapped to genes
    type: File
    outputSource: step-14/output
  step-15-output:
    label: Supervenn Visualization
    doc: A visualization for comparing sets
    type: File
    outputSource: step-15/output
steps:
  step-1:
    run: Input[Disease].cwl
    label: Disease Input
    doc: Start with a Disease
    in:
      data:
        source: step-1-data
      outputFilename:
        default: step-1-output.json
    out:
      - output
  step-2:
    run: Input[Drug].cwl
    label: Drug Input
    doc: Start with a Drug
    in:
      data:
        source: step-2-data
      outputFilename:
        default: step-2-output.json
    out:
      - output
  step-3:
    run: EnrichrTermSearch[Disease].cwl
    label: Extract Gene Sets Containing the Disease in the Set Label
    doc: Find Disease Terms in Enrichr Libraries
    in:
      inputs.term:
        label: Disease
        source: step-1/output
      outputFilename:
        default: step-3-output.json
    out:
      - output
  step-4:
    run: ExtractEnrichrTermSearch[MGI_Mammalian_Phenotype_Level_4_2021].cwl
    label: Extract MGI Mammalian Phenotypes Associated with the Term Search
    doc: Extract Terms from the MGI Mammalian Phenotype Level 4 2021 Library
    in:
      inputs.searchResults:
        label: Enrichr Term Search Results
        source: step-3/output
      outputFilename:
        default: step-4-output.json
    out:
      - output
  step-5:
    run: EnrichrSetTToGMT[Phenotype].cwl
    label: Enrichr Phenotype Set as GMT
    doc: Load Enrichr set as GMT
    in:
      inputs.enrichrset:
        label: Enrichr Phenotype Set
        source: step-4/output
      outputFilename:
        default: step-5-output.json
    out:
      - output
  step-6:
    run: GMTConsensus.cwl
    label: Compute Consensus Gene Set
    doc: Find genes which appear in more than one set
    in:
      inputs.gmt:
        label: Gene Matrix Transpose
        source: step-5/output
      outputFilename:
        default: step-6-output.json
    out:
      - output
  step-7:
    run: ExtractEnrichrTermSearch[GWAS_Catalog_2019].cwl
    label: Extract GWAS Phenotypes Associated with the Term Search
    doc: Extract Terms from the GWAS Catalog 2019 Library
    in:
      inputs.searchResults:
        label: Enrichr Term Search Results
        source: step-3/output
      outputFilename:
        default: step-7-output.json
    out:
      - output
  step-8:
    run: EnrichrSetTToGMT[Phenotype].cwl
    label: Enrichr Phenotype Set as GMT
    doc: Load Enrichr set as GMT
    in:
      inputs.enrichrset:
        label: Enrichr Phenotype Set
        source: step-7/output
      outputFilename:
        default: step-8-output.json
    out:
      - output
  step-9:
    run: GMTConsensus.cwl
    label: Compute Consensus Gene Set
    doc: Find genes which appear in more than one set
    in:
      inputs.gmt:
        label: Gene Matrix Transpose
        source: step-8/output
      outputFilename:
        default: step-9-output.json
    out:
      - output
  step-10:
    run: GenesetsToGMT.cwl
    label: Assemble GMT from Gene Sets
    doc: Group multiple independently generated gene sets into a single GMT
    in:
      data:
        source: step-10-data
      inputs.genesets:0:
        label: Gene Set
        source: step-6/output
      inputs.genesets:1:
        label: Gene Set
        source: step-9/output
      outputFilename:
        default: step-10-output.json
    out:
      - output
  step-11:
    run: EnrichrTermSearch[Drug].cwl
    label: Extract Gene Sets Containing the Drug in the Set Label
    doc: Find Drug Terms in Enrichr Libraries
    in:
      inputs.term:
        label: Drug
        source: step-2/output
      outputFilename:
        default: step-11-output.json
    out:
      - output
  step-12:
    run: ExtractEnrichrTermSearch[LINCS_L1000_Chem_Pert_Consensus_Sigs].cwl
    label: Extract L1000 Chem Pert Signatures Containing the Term Search
    doc: Extract Terms from the LINCS L1000 Chem Pert Consensus Sigs Library
    in:
      inputs.searchResults:
        label: Enrichr Term Search Results
        source: step-11/output
      outputFilename:
        default: step-12-output.json
    out:
      - output
  step-13:
    run: EnrichrSetTToGMT[Drug].cwl
    label: Enrichr Drug Set as GMT
    doc: Load Enrichr set as GMT
    in:
      inputs.enrichrset:
        label: Enrichr Drug Set
        source: step-12/output
      outputFilename:
        default: step-13-output.json
    out:
      - output
  step-14:
    run: GMTConcatenate.cwl
    label: Concatenate GMTs
    doc: Join several GMTs into one
    in:
      inputs.gmts:0:
        label: Gene Matrix Transpose
        source: step-10/output
      inputs.gmts:1:
        label: Gene Matrix Transpose
        source: step-13/output
      outputFilename:
        default: step-14-output.json
    out:
      - output
  step-15:
    run: SupervennFromGMT.cwl
    label: Compare sets with Supervenn
    doc: Interactively analyse overlap between sets
    in:
      inputs.gmt:
        label: Gene Matrix Transpose
        source: step-14/output
      outputFilename:
        default: step-15-output.json
    out:
      - output
