rule all:
    input:
        auspice_tip_frequencies = "auspice/zika_tip-frequencies.json",
        auspice_v1_tree = "auspice/v1_zika_tree.json",
        auspice_v1_meta = "auspice/v1_zika_meta.json",
        auspice_v1_seq = "auspice/v1_zika_seq.json",
        auspice_v1_tip_frequencies = "auspice/v1_zika_tip-frequencies.json",
        auspice_v2 = "auspice/v2_zika.json",
        auspice_v2_tip_frequencies = "auspice/v2_zika_tip-frequencies.json"
rule files:
    params:
        input_fasta = "data/zika.fasta",
        dropped_strains = "config/dropped_strains.txt",
        reference = "config/zika_outgroup.gb",
        colors = "config/colors.tsv",
        auspice_config_v1 = "config/auspice_config_v1.json",
        auspice_config_v2 = "config/auspice_config_v2.json"

files = rules.files.params

rule parse:
    input:
        sequences = files.input_fasta
    output:
        sequences = "results/sequences.fasta",
        metadata = "results/metadata.tsv"
    params:
        fasta_fields = "strain virus accession date region country division city db segment authors url title journal paper_url",
        prettify_fields = "region country division city"
    shell:
        """
        augur parse \
            --sequences {input.sequences} \
            --output-sequences {output.sequences} \
            --output-metadata {output.metadata} \
            --fields {params.fasta_fields} \
            --prettify-fields {params.prettify_fields}
        """

rule filter:
    message: "Filtering sequences"
    input:
        sequences = rules.parse.output.sequences,
        metadata = rules.parse.output.metadata,
        exclude = files.dropped_strains
    output:
        sequences = "results/filtered.fasta"
    params:
        group_by = "country year month",
        sequences_per_group = 1,
        min_date = 2012
    shell:
        """
        augur filter \
            --sequences {input.sequences} \
            --metadata {input.metadata} \
            --exclude {input.exclude} \
            --output {output.sequences} \
            --group-by {params.group_by} \
            --sequences-per-group {params.sequences_per_group} \
            --min-date {params.min_date}
        """

rule align:
    message: "Aligning sequences"
    input:
        sequences = rules.filter.output.sequences,
        reference = files.reference
    output:
        alignment = "results/aligned.fasta"
    shell:
        """
        augur align \
            --sequences {input.sequences} \
            --reference-sequence {input.reference} \
            --output {output.alignment} \
            --fill-gaps
        """

rule tree:
    message: "Building tree"
    input:
        alignment = rules.align.output.alignment
    output:
        tree = "results/tree_raw.nwk"
    params:
        method = "iqtree"
    shell:
        """
        augur tree \
            --alignment {input.alignment} \
            --output {output.tree} \
            --method {params.method}
        """

rule refine:
    message: "Refining tree"
    input:
        tree = rules.tree.output.tree,
        alignment = rules.align.output,
        metadata = rules.parse.output.metadata
    output:
        tree = "results/tree.nwk",
        node_data = "results/branch_lengths.json"
    params:
        coalescent = "opt",
        date_inference = "marginal",
        clock_filter_iqd = 4
    shell:
        """
        augur refine \
            --tree {input.tree} \
            --alignment {input.alignment} \
            --metadata {input.metadata} \
            --output-tree {output.tree} \
            --output-node-data {output.node_data} \
            --timetree \
            --coalescent {params.coalescent} \
            --date-confidence \
            --date-inference {params.date_inference} \
            --clock-filter-iqd {params.clock_filter_iqd}
        """

rule ancestral:
    message: "Reconstructing ancestral sequences and mutations"
    input:
        tree = rules.refine.output.tree,
        alignment = rules.align.output
    output:
        node_data = "results/nt_muts.json"
    params:
        inference = "joint"
    shell:
        """
        augur ancestral \
            --tree {input.tree} \
            --alignment {input.alignment} \
            --infer-ambiguous \
            --output {output.node_data} \
            --inference {params.inference}
        """

rule translate:
    message: "Translating amino acid sequences"
    input:
        tree = rules.refine.output.tree,
        node_data = rules.ancestral.output.node_data,
        reference = files.reference
    output:
        node_data = "results/aa_muts.json"
    shell:
        """
        augur translate \
            --tree {input.tree} \
            --ancestral-sequences {input.node_data} \
            --reference-sequence {input.reference} \
            --output {output.node_data} \
        """

rule traits:
    message: "Inferring ancestral traits"
    input:
        tree = rules.refine.output.tree,
        metadata = rules.parse.output.metadata
    output:
        node_data = "results/traits.json",
    params:
        columns = "country region",
        sampling_bias_correction = 3,
        weights = "config/trait_weights.csv"
    shell:
        """
        augur traits \
            --tree {input.tree} \
            --weights {params.weights} \
            --metadata {input.metadata} \
            --output {output.node_data} \
            --columns {params.columns} \
            --sampling-bias-correction {params.sampling_bias_correction} \
            --confidence
        """

rule export_v1:
    message: "Exporting data files for for auspice using nextflu compatible schema"
    input:
        tree = rules.refine.output.tree,
        metadata = rules.parse.output.metadata,
        branch_lengths = rules.refine.output.node_data,
        traits = rules.traits.output.node_data,
        nt_muts = rules.ancestral.output.node_data,
        aa_muts = rules.translate.output.node_data,
        colors = files.colors,
        auspice_config = files.auspice_config_v1
    output:
        auspice_tree = rules.all.input.auspice_v1_tree,
        auspice_meta = rules.all.input.auspice_v1_meta,
        auspice_seq = rules.all.input.auspice_v1_seq
    shell:
        """
        augur export v1 \
            --tree {input.tree} \
            --metadata {input.metadata} \
            --node-data {input.branch_lengths} {input.traits} {input.nt_muts} {input.aa_muts} \
            --colors {input.colors} \
            --auspice-config {input.auspice_config} \
            --output-tree {output.auspice_tree} \
            --output-meta {output.auspice_meta} \
            --output-sequence {output.auspice_seq}
        augur validate export-v1 {output.auspice_meta} {output.auspice_tree}
        """

rule export_v2:
    message: "Exporting data files for for auspice using nextstrain schema v2"
    input:
        tree = rules.refine.output.tree,
        metadata = rules.parse.output.metadata,
        branch_lengths = rules.refine.output.node_data,
        traits = rules.traits.output.node_data,
        nt_muts = rules.ancestral.output.node_data,
        aa_muts = rules.translate.output.node_data,
        auspice_config = files.auspice_config_v2,
        colors = files.colors
    params:
        title = "Real-time tracking of Zika virus evolution -- v2 JSON"
    output:
        auspice_main = rules.all.input.auspice_v2
    shell:
        """
        augur export v2 \
            --tree {input.tree} \
            --metadata {input.metadata} \
            --node-data {input.branch_lengths} {input.traits} {input.nt_muts} {input.aa_muts} \
            --colors {input.colors} \
            --auspice-config {input.auspice_config} \
            --output {output.auspice_main} \
            --title {params.title:q} \
            --panels tree map entropy frequencies
        """

rule tip_frequencies:
    input:
        tree = rules.refine.output.tree,
        metadata = rules.parse.output.metadata
    params:
        pivot_interval = 3
    output:
        tip_freq = rules.all.input.auspice_tip_frequencies
    shell:
        """
        augur frequencies \
            --method kde \
            --tree {input.tree} \
            --metadata {input.metadata} \
            --pivot-interval {params.pivot_interval} \
            --output {output}
        """

rule copy_tip_frequencies:
    input:
        frequencies = rules.tip_frequencies.output.tip_freq
    output:
        v1 = rules.all.input.auspice_v1_tip_frequencies,
        v2 = rules.all.input.auspice_v2_tip_frequencies
    shell:
        """
        cp -f {input} {output.v1};
        cp -f {input} {output.v2};
        """

rule clean:
    message: "Removing directories: {params}"
    params:
        "results ",
        "auspice"
    shell:
        "rm -rfv {params}"
