Skip to content

Commit

Permalink
MAINT: move partitioning and collating actions to q2-types (#175)
Browse files Browse the repository at this point in the history
  • Loading branch information
VinzentRisch authored Sep 20, 2024
1 parent e1ae7f1 commit b56f64b
Show file tree
Hide file tree
Showing 41 changed files with 72 additions and 731 deletions.
5 changes: 2 additions & 3 deletions q2_moshpit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from . import abundance
from . import busco
from . import eggnog
from . import partition
from . import prodigal
from ._version import get_versions
from .dereplication import dereplicate_mags
Expand All @@ -31,8 +30,8 @@
__all__ = [
'metabat2', 'bracken', 'kraken_class', 'kraken_db',
'kaiju_class', 'kaiju_db', 'dereplicate_mags', 'eggnog',
'busco', 'prodigal', 'kraken_helpers', 'partition',
'filter_derep_mags', 'filter_mags', 'get_feature_lengths',
'busco', 'prodigal', 'kraken_helpers', 'filter_derep_mags',
'filter_mags', 'get_feature_lengths',
'multiply_tables', '_multiply_tables', '_multiply_tables_pa',
'_multiply_tables_relative', 'abundance', 'filter_reads_pangenome'
]
4 changes: 3 additions & 1 deletion q2_moshpit/busco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

from .busco import evaluate_busco, _evaluate_busco, _visualize_busco
from .database import fetch_busco_db
from .partition import collate_busco_results

__all__ = [
"evaluate_busco", "_evaluate_busco", "_visualize_busco", "fetch_busco_db"
"evaluate_busco", "_evaluate_busco", "_visualize_busco", "fetch_busco_db",
"collate_busco_results"
]
2 changes: 1 addition & 1 deletion q2_moshpit/busco/busco.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def evaluate_busco(
partition_action = "partition_sample_data_mags"
else:
partition_action = "partition_feature_data_mags"
partition_mags = ctx.get_action("moshpit", partition_action)
partition_mags = ctx.get_action("types", partition_action)

(partitioned_mags, ) = partition_mags(bins, num_partitions)
results = []
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mag_id sample_id input_file dataset complete single duplicated fragmented missing n_markers scaffold_n50 contigs_n50 percent_gaps scaffolds length
bec9c09a-62c3-4fbb-8f7f-9fdf9aefc02f SRR13221817 bec9c09a-62c3-4fbb-8f7f-9fdf9aefc02f.fasta bacteria_odb10 28.2 27.4 0.8 8.9 62.9 124 4785 4785 0.000% 265 1219165
5978e667-0476-4921-8cc2-34b9d1b508c1 SRR13221817 5978e667-0476-4921-8cc2-34b9d1b508c1.fasta bacteria_odb10 1.6 1.6 0.0 1.6 96.8 124 3548 3548 0.000% 67 245922
625c95e6-ac2f-4e6e-9470-af8cd11c75dd SRR13221817 625c95e6-ac2f-4e6e-9470-af8cd11c75dd.fasta bacteria_odb10 26.6 26.6 0.0 3.2 70.2 124 78679 78679 0.000% 17 714893
6ed8c097-1c87-4019-8b38-b95507011b41 SRR14143412 6ed8c097-1c87-4019-8b38-b95507011b41.fasta bacteria_odb10 8.1 8.1 0.0 0.8 91.1 124 74198 74198 0.000% 11 560715
bf2c0af0-83ba-44a6-b550-3b7884a62a82 SRR14143412 bf2c0af0-83ba-44a6-b550-3b7884a62a82.fasta bacteria_odb10 95.9 93.5 2.4 2.4 1.7 124 80820 80820 0.000% 92 4253319
a2401d15-802f-42c3-9eb4-c282e2141b14 SRR14143412 a2401d15-802f-42c3-9eb4-c282e2141b14.fasta bacteria_odb10 89.5 89.5 0.0 1.6 8.9 124 31708 31708 0.000% 106 2120157
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mag_id sample_id input_file dataset complete single duplicated fragmented missing n_markers scaffold_n50 contigs_n50 percent_gaps scaffolds length
bec9c09a-62c3-4fbb-8f7f-9fdf9aefc02f SRR13221817 bec9c09a-62c3-4fbb-8f7f-9fdf9aefc02f.fasta bacteria_odb10 28.2 27.4 0.8 8.9 62.9 124 4785 4785 0.000% 265 1219165
5978e667-0476-4921-8cc2-34b9d1b508c1 SRR13221817 5978e667-0476-4921-8cc2-34b9d1b508c1.fasta bacteria_odb10 1.6 1.6 0.0 1.6 96.8 124 3548 3548 0.000% 67 245922
625c95e6-ac2f-4e6e-9470-af8cd11c75dd SRR13221817 625c95e6-ac2f-4e6e-9470-af8cd11c75dd.fasta bacteria_odb10 26.6 26.6 0.0 3.2 70.2 124 78679 78679 0.000% 17 714893
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mag_id sample_id input_file dataset complete single duplicated fragmented missing n_markers scaffold_n50 contigs_n50 percent_gaps scaffolds length
6ed8c097-1c87-4019-8b38-b95507011b41 SRR14143412 6ed8c097-1c87-4019-8b38-b95507011b41.fasta bacteria_odb10 8.1 8.1 0.0 0.8 91.1 124 74198 74198 0.000% 11 560715
bf2c0af0-83ba-44a6-b550-3b7884a62a82 SRR14143412 bf2c0af0-83ba-44a6-b550-3b7884a62a82.fasta bacteria_odb10 95.9 93.5 2.4 2.4 1.7 124 80820 80820 0.000% 92 4253319
a2401d15-802f-42c3-9eb4-c282e2141b14 SRR14143412 a2401d15-802f-42c3-9eb4-c282e2141b14.fasta bacteria_odb10 89.5 89.5 0.0 1.6 8.9 124 31708 31708 0.000% 106 2120157
36 changes: 36 additions & 0 deletions q2_moshpit/busco/tests/test_partition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2022-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os

import pandas as pd
from qiime2.plugin.testing import TestPluginBase

from q2_moshpit.busco.partition import collate_busco_results
from q2_moshpit.busco.types import BUSCOResultsDirectoryFormat


class TestBUSCOPlots(TestPluginBase):
package = "q2_moshpit.busco.tests"

def test_collate_busco_results(self):
p1 = self.get_data_path("busco_results/sample1")
p2 = self.get_data_path("busco_results/sample2")

busco_results = [
BUSCOResultsDirectoryFormat(p1, mode="r"),
BUSCOResultsDirectoryFormat(p2, mode="r")
]

collated_busco_result = collate_busco_results(busco_results)

obs = pd.read_csv(
os.path.join(str(collated_busco_result), "busco_results.tsv"))
exp = pd.read_csv(
self.get_data_path("busco_results/collated/busco_results.tsv"))

pd.testing.assert_frame_equal(obs, exp)
6 changes: 4 additions & 2 deletions q2_moshpit/eggnog/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,12 @@ def eggnog_annotate(
num_partitions=None
):
_eggnog_annotate = ctx.get_action("moshpit", "_eggnog_annotate")
collate_annotations = ctx.get_action("moshpit", "collate_annotations")
collate_annotations = ctx.get_action(
"types", "collate_ortholog_annotations"
)

if eggnog_hits.type <= SampleData[Orthologs]:
partition_method = ctx.get_action("moshpit", "partition_orthologs")
partition_method = ctx.get_action("types", "partition_orthologs")
else:
raise NotImplementedError()

Expand Down
6 changes: 3 additions & 3 deletions q2_moshpit/eggnog/orthologs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,17 @@ def _run_eggnog_search_pipeline(
- collated_tables: The collated feature tables.
"""
if sequences.type <= FeatureData[MAG]:
plugin, action_name = "moshpit", "partition_feature_data_mags"
plugin, action_name = "types", "partition_feature_data_mags"
elif sequences.type <= SampleData[Contigs]:
plugin, action_name = "assembly", "partition_contigs"
elif sequences.type <= SampleData[MAGs]:
plugin, action_name = "moshpit", "partition_sample_data_mags"
plugin, action_name = "types", "partition_sample_data_mags"
else:
raise NotImplementedError()

partition_method = ctx.get_action(plugin, action_name)
_eggnog_search = ctx.get_action("moshpit", search_action)
collate_hits = ctx.get_action("moshpit", "collate_orthologs")
collate_hits = ctx.get_action("types", "collate_orthologs")
_eggnog_feature_table = ctx.get_action("moshpit", "_eggnog_feature_table")
(partitioned_sequences,) = partition_method(sequences, num_partitions)

Expand Down
2 changes: 1 addition & 1 deletion q2_moshpit/kraken2/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def classify_kraken2(
partition_method = ctx.get_action("assembly", "partition_contigs")
elif seqs.type <= SampleData[MAGs]:
partition_method = ctx.get_action(
"moshpit", "partition_sample_data_mags"
"types", "partition_sample_data_mags"
)
# FeatureData[MAG] is not parallelized
elif seqs.type <= FeatureData[MAG]:
Expand Down
24 changes: 0 additions & 24 deletions q2_moshpit/partition/__init__.py

This file was deleted.

23 changes: 0 additions & 23 deletions q2_moshpit/partition/annotations.py

This file was deleted.

141 changes: 0 additions & 141 deletions q2_moshpit/partition/mags.py

This file was deleted.

Loading

0 comments on commit b56f64b

Please sign in to comment.