import pandas as pd
from pathlib import Path
from humanfriendly import parse_size


configfile: "config/config.yaml"


WORKFLOW = Path("workflow").resolve()
CONFIG = Path("config").resolve()
RULES = WORKFLOW / "rules"
DATA = Path("data").resolve()
ENVS = WORKFLOW / "envs"
SCRIPTS = WORKFLOW / "scripts"
RESULTS = Path("results").resolve()
BENCH = RESULTS / "benchmark"
LOGS = Path("logs/rules").resolve()
CONTAINERS = config["containers"]
RESOURCES = Path("resources").resolve()
GB = 1_000

gramtools_lineages = pd.read_csv(config["gramtools_lineages"])
GRAMTOOLS_SAMPLES = sorted(list(gramtools_lineages["strain"]))

target_files = set()

kraken_libs = ["bacteria", "archaea", "viral", "human", "plasmid"]

for tech in ["illumina", "ont"]:
    target_files.add(RESULTS / f"dehumanise/summary.{tech}.csv")
    target_files.add(RESULTS / f"real/dehumanise/summary.{tech}.csv")
    target_files.add(RESULTS / f"real/classify/summary.mtb.{tech}.csv")
    target_files.add(RESULTS / f"real/coverage/relative_change/{tech}.csv")
    target_files.add(RESULTS / f"coverage/relative_change/{tech}.csv")
    for c in ["genus", "species", "mtb", "mtbc"]:
        target_files.add(RESULTS / f"classify/summary.{c}.{tech}.csv")


localrules:
    all,


rule all:
    input:
        target_files,


include: RULES / "common.smk"
include: RULES / "db.smk"
include: RULES / "simulate.smk"
include: RULES / "dehumanise.smk"
include: RULES / "classify.smk"
include: RULES / "assess.smk"
include: RULES / "real.smk"
include: RULES / "coverage.smk"
