Skip to content

Commit

Permalink
Merge pull request #559 from uclahs-cds/czhu-fix-call-noncoding
Browse files Browse the repository at this point in the history
Fix the problem of X in peptide sequence
  • Loading branch information
zhuchcn authored Aug 27, 2022
2 parents 2d4c108 + ca9b71a commit c56358a
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 6 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

## [Unreleased]

## [0.9.3] - 2022-08-05
## [0.9.3] - 2022-08-26

### Fixed

Expand All @@ -22,6 +22,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

- Fixed issue that `cpop_collapsed` attribute was not retained after merging so peptides that don't end with cleavage sites were yield. #554

- Fixed problem caused by N in the reference DNA sequence. #556

---

## [0.9.2] - 2022-07-29
Expand Down
7 changes: 4 additions & 3 deletions moPepGen/svgraph/VariantPeptideDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,10 @@ def is_valid_seq(self, seq:Seq, blacklist:Set[str]) -> bool:
max_length = self.cleavage_params.max_length
min_mw = self.cleavage_params.min_mw

return seq not in blacklist and \
min_length <= len(seq) <= max_length and \
SeqUtils.molecular_weight(seq, 'protein') >= min_mw
return seq not in blacklist \
and min_length <= len(seq) <= max_length \
and 'X' not in seq \
and SeqUtils.molecular_weight(seq, 'protein') >= min_mw

def join_miscleaved_peptides(self, check_variants:bool,
additional_variants:List[VariantRecord], blacklist:Set[str],
Expand Down
2 changes: 1 addition & 1 deletion moPepGen/util/downsample_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def get_noncoding_translate(tx_id:str, anno:gtf.GenomicAnnotation,
orf = f"ORF{start}:{end}"
alt_protein_id = f"{protein_id}-{orf}"
alt_tx_id = f"{tx_id}-{orf}"
description = f"{alt_protein_id}|{alt_tx_id}|{gene_id}"
description = f"{alt_protein_id}|{alt_tx_id}|{gene_id}|-"
aa_seq.id = alt_protein_id
aa_seq.protein_id = alt_protein_id
aa_seq.transcript_id = alt_tx_id
Expand Down
13 changes: 12 additions & 1 deletion test/unit/test_variant_peptide_dict.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
""" Test Module for VariantPeptideDict """
import unittest
from test.unit import create_aa_record, create_variants
from moPepGen.svgraph.VariantPeptideDict import VariantPeptideDict, \
from moPepGen import params
from moPepGen.svgraph.VariantPeptideDict import MiscleavedNodes, VariantPeptideDict, \
VariantPeptideMetadata
import moPepGen.aa.VariantPeptideIdentifier as vpi

Expand Down Expand Up @@ -65,3 +66,13 @@ def test_get_peptide_sequences_circ_rna(self):
seqs = pool.get_peptide_sequences()
self.assertEqual({str(x.seq) for x in seqs}, {'SSSSSSSSSR'})
self.assertEqual(list(seqs)[0].description, 'CIRC-ENST0001-E1-E2-E3|1')


class TestCaseMiscleavedNodes(unittest.TestCase):
""" Test cases for MiscleavedNodes """
def test_is_valid_x(self):
""" Test that when X is found in the sequence, it is recognized as an
invalid sequence. """
cleavage_params = params.CleavageParams(enzyme='trypsin')
misc_nodes = MiscleavedNodes([], cleavage_params)
self.assertFalse(misc_nodes.is_valid_seq('AAAAXAAA', set()))

0 comments on commit c56358a

Please sign in to comment.