Skip to content

Commit

Permalink
Merge pull request #562 from uclahs-cds/czhu-fix-call-variant-alt-splice
Browse files Browse the repository at this point in the history
Fixed alternative splicing deletion that starts at 3rd nucleotide of start codon
  • Loading branch information
zhuchcn authored Sep 7, 2022
2 parents d7f054d + 4b8dcd6 commit b716ecd
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

## [Unreleased]

## [0.9.4] - 2022-09-07

- Fixed issue of alternative splicing deletion that starts at the third nucleotide of start codon. Those variants are now skipped. #560

## [0.9.3] - 2022-08-26

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion moPepGen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Iterable, IO


__version__ = '0.9.3'
__version__ = '0.9.4'

## Error messages
ERROR_INDEX_IN_INTRON = 'The genomic index seems to be in an intron'
Expand Down
5 changes: 5 additions & 0 deletions moPepGen/seqvar/VariantRecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
SINGLE_NUCLEOTIDE_SUBSTITUTION = ['SNV', 'SNP', 'INDEL', 'RNAEditingSite']
ATTRS_POSITION = ['START', 'DONOR_START', 'ACCEPTER_START', 'ACCEPTER_POSITION']
ALTERNATIVE_SPLICING_TYPES = ['Insertion', 'Deletion', 'Substitution']
RMATS_TYPES = ['SE', 'RI', 'A3SS', 'A5SS', 'MXE']

class VariantRecord():
""" Defines the location, ref and alt of a genomic variant.
Expand Down Expand Up @@ -228,6 +229,10 @@ def is_fusion(self) -> bool:
""" Check if this is a fusion """
return self.type == 'Fusion'

def is_alternative_splicing(self) -> bool:
""" Check if this is an alternative splicing event """
return any(self.id.startswith(x) for x in RMATS_TYPES)

def is_in_frame_fusion(self, anno:GenomicAnnotation):
""" Check if this is a in-frame fusion. A in-frame fusion is only when
both donor and accepter transcripts a protein coding (which known
Expand Down
3 changes: 2 additions & 1 deletion moPepGen/svgraph/ThreeFrameTVG.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,8 @@ def create_variant_graph(self, variants:List[seqvar.VariantRecord],

if variant.location.start == start_index - 1 \
and (variant.is_insertion() or variant.is_deletion()) \
and not variant.is_fusion():
and not variant.is_fusion() \
and not variant.is_alternative_splicing():
variant.to_end_inclusion(self.seq)

# Skip variants that the position is smaller than the first NT
Expand Down
23 changes: 23 additions & 0 deletions test/unit/test_three_frame_tvg.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,29 @@ def test_create_variant_graph_frameshifting(self):
expected = {'AAA', 'AA', 'A'}
self.assertEqual(received, expected)

def test_create_variant_graph_deletion_alt_splice(self):
""" alternative splicing deletion that starts with the third nucleotide
of start codon should be skipped. """
data = {
1: ['AAATAAATAAAT', ['RF0'], [], 0],
2: ['AATAAATAAAT', ['RF1'], [], 1],
3: ['ATAAATAAAT', ['RF2'], [], 2]
}

var_data = [
(3, 10, 'T', '<DEL>', 'Deletion', 'SE-10')
]

graph, _ = create_three_frame_tvg(data, 'AAATAAATAAAT')
graph.seq.orf = FeatureLocation(start=1, end=4)
graph.has_known_orf = True
variants = create_variants(var_data)
graph.create_variant_graph(variants, None, None, None)
received = {str(list(n.out_edges)[0].out_node.seq.seq) \
for n in graph.reading_frames}
expected = {x[0] for x in data.values()}
self.assertEqual(received, expected)

def test_apply_insertion_case1(self):
""" apply_insertion """
anno = create_genomic_annotation(ANNOTATION_DATA)
Expand Down

0 comments on commit b716ecd

Please sign in to comment.