#!/usr/bin/env python3

working_dir = config["project"]

rule genomad_database:
    output:
        os.path.join(working_dir, "Database/genomad_db/genomad_db")
    params:
        database_dir = os.path.join(working_dir, "Database")
    conda:
        os.path.join(CONDAENV,"genomad.yaml")
    shell:
        "mkdir -p {params.database_dir} && " 
        "genomad download-database {params.database_dir} "

rule eggNOGG_database:
    output:
        directory(os.path.join(working_dir, "Database/eggNOG/"))
    conda:
        os.path.join(CONDAENV,"eggnog.yaml")
    shell:
        "mkdir -p {output} && " 
        "download_eggnog_data.py  --data_dir {output} -y "

rule GTDBTK_database:
    output:
        directory("Database/release220/"),
        "Database/gtdbtk.downloaded"
    params:
        db_dir = "Database",
    shell:
        "mkdir -p {params.db_dir} && "
        "wget https://data.gtdb.ecogenomic.org/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz -P {params.db_dir} && "
        "tar -C {params.db_dir} -xvzf {params.db_dir}/gtdbtk_r220_data.tar.gz && "
        "rm {params.db_dir}/gtdbtk_r220_data.tar.gz && touch {output[1]}"

rule cat_database:
    output:
        database = directory(os.path.join(working_dir, "Database/catbat_db")),
        taxo = directory(os.path.join(working_dir, "Database/catbat_taxo")),
    threads:
        10
    conda:
        os.path.join(CONDAENV,"catbat.yaml")
    shell:
        "if [ -f {output.database} ]; then "        
        "echo CAT database found ; "
        "else "
        "CAT prepare --fresh "
        "-n {threads} "
        "-d {output.database} "
        "-t {output.taxo} ; "
        "fi "
