diff --git a/graphkb/constants.py b/graphkb/constants.py index 9f443bc..d8fd66e 100644 --- a/graphkb/constants.py +++ b/graphkb/constants.py @@ -60,8 +60,10 @@ ONCOKB_SOURCE_NAME = "oncokb" +TSO500_SOURCE_NAME = "tso500" ONCOGENE = "oncogenic" TUMOUR_SUPPRESSIVE = "tumour suppressive" +CANCER_GENE = "cancer gene" FUSION_NAMES = ["structural variant", "fusion"] PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST = ["cancer genome interpreter", "civic"] diff --git a/graphkb/genes.py b/graphkb/genes.py index 92279c2..7d5792b 100644 --- a/graphkb/genes.py +++ b/graphkb/genes.py @@ -4,6 +4,7 @@ from . import GraphKBConnection from .constants import ( BASE_THERAPEUTIC_TERMS, + CANCER_GENE, CHROMOSOMES, FAILED_REVIEW_STATUS, GENE_RETURN_PROPERTIES, @@ -12,6 +13,7 @@ PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST, PREFERRED_GENE_SOURCE, RELEVANCE_BASE_TERMS, + TSO500_SOURCE_NAME, TUMOUR_SUPPRESSIVE, ) from .match import get_equivalent_features @@ -20,25 +22,29 @@ from .vocab import get_terms_set -def _get_oncokb_gene_list( - conn: GraphKBConnection, relevance: str, ignore_cache: bool = False +def _get_tumourigenesis_genes_list( + conn: GraphKBConnection, + relevance: str, + sources: List[str], + ignore_cache: bool = False, ) -> List[Ontology]: - source = conn.get_source(ONCOKB_SOURCE_NAME)["@rid"] - statements = cast( List[Statement], conn.query( { "target": "Statement", - "filters": [ - {"source": source}, - {"relevance": {"target": "Vocabulary", "filters": {"name": relevance}}}, - ], + "filters": { + "AND": [ + {"source": {"target": "Source", "filters": {"name": sources}}}, + {"relevance": {"target": "Vocabulary", "filters": {"name": relevance}}}, + ] + }, "returnProperties": [f"subject.{prop}" for prop in GENE_RETURN_PROPERTIES], }, ignore_cache=ignore_cache, ), ) + genes: Dict[str, Ontology] = {} for statement in statements: @@ -58,7 +64,7 @@ def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]: Returns: gene (Feature) records """ - return _get_oncokb_gene_list(conn, ONCOGENE) + return _get_tumourigenesis_genes_list(conn, ONCOGENE, [ONCOKB_SOURCE_NAME]) def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]: @@ -70,7 +76,21 @@ def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]: Returns: gene (Feature) records """ - return _get_oncokb_gene_list(conn, TUMOUR_SUPPRESSIVE) + return _get_tumourigenesis_genes_list(conn, TUMOUR_SUPPRESSIVE, [ONCOKB_SOURCE_NAME]) + + +def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]: + """Get the list of cancer genes stored in GraphKB derived from OncoKB & TSO500. + + Args: + conn: the graphkb connection object + + Returns: + gene (Feature) records + """ + return _get_tumourigenesis_genes_list( + conn, CANCER_GENE, [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME] + ) def get_therapeutic_associated_genes(graphkb_conn: GraphKBConnection) -> List[Ontology]: diff --git a/setup.cfg b/setup.cfg index c332460..ac8829a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,7 +10,7 @@ include_trailing_comma = true [metadata] name = graphkb url = https://github.com/bcgsc/pori_graphkb_python -version = 1.12.0 +version = 1.13.0 author_email = graphkb@bcgsc.ca description = python adapter for interacting with the GraphKB API long_description = file: README.md diff --git a/tests/test_genes.py b/tests/test_genes.py index ef88d14..00e391e 100644 --- a/tests/test_genes.py +++ b/tests/test_genes.py @@ -7,6 +7,7 @@ from graphkb import GraphKBConnection from graphkb.genes import ( + get_cancer_genes, get_cancer_predisposition_info, get_gene_information, get_genes_from_variant_types, @@ -22,6 +23,7 @@ CANONICAL_ONCOGENES = ["kras", "nras", "alk"] CANONICAL_TS = ["cdkn2a", "tp53"] +CANONICAL_CG = ["ercc1", "fanci", "h2bc4", "h2bc17", "acvr1b"] CANONICAL_FUSION_GENES = ["alk", "ewsr1", "fli1"] CANONICAL_STRUCTURAL_VARIANT_GENES = ["brca1", "dpyd", "pten"] CANNONICAL_THERAPY_GENES = ["erbb2", "brca2", "egfr"] @@ -112,6 +114,8 @@ def test_oncogene(conn): assert gene in names for gene in CANONICAL_TS: assert gene not in names + for gene in CANONICAL_CG: + assert gene not in names def test_tumour_supressors(conn): @@ -121,6 +125,19 @@ def test_tumour_supressors(conn): assert gene in names for gene in CANONICAL_ONCOGENES: assert gene not in names + for gene in CANONICAL_CG: + assert gene not in names + + +def test_cancer_genes(conn): + result = get_cancer_genes(conn) + names = {row["name"] for row in result} + for gene in CANONICAL_CG: + assert gene in names + for gene in CANONICAL_TS: + assert gene not in names + for gene in CANONICAL_ONCOGENES: + assert gene not in names def test_get_pharmacogenomic_info(conn):