From 8ffa95d0f3be8cd4e02696338e550f57ee663187 Mon Sep 17 00:00:00 2001
From: Dustin Bleile <dbleile@bcgsc.ca>
Date: Wed, 7 Jun 2023 10:56:20 -0700
Subject: [PATCH 1/6] lint - black -S -C -l 100 *.py graphkb tests

---
 graphkb/genes.py        | 12 ++----------
 graphkb/match.py        | 14 ++++----------
 graphkb/statement.py    |  4 +---
 graphkb/util.py         | 28 +++++-----------------------
 graphkb/vocab.py        | 15 +++------------
 tests/test_match.py     | 35 +++++++++--------------------------
 tests/test_statement.py |  6 +-----
 tests/test_util.py      |  6 +-----
 8 files changed, 26 insertions(+), 94 deletions(-)

diff --git a/graphkb/genes.py b/graphkb/genes.py
index 93bc62a..f270afa 100644
--- a/graphkb/genes.py
+++ b/graphkb/genes.py
@@ -238,10 +238,7 @@ def get_cancer_predisposition_info(conn: GraphKBConnection) -> Tuple[List[str],
                         "target": "Source",
                         "filters": {"@rid": get_rid(conn, "Source", "CGL")},
                     },
-                    "relevance": {
-                        "target": "Vocabulary",
-                        "filters": {"@rid": relevance_rids},
-                    },
+                    "relevance": {"target": "Vocabulary", "filters": {"@rid": relevance_rids}},
                 }
             ],
             "returnProperties": [
@@ -312,12 +309,7 @@ def get_pharmacogenomic_info(conn: GraphKBConnection) -> Tuple[List[str], Dict[s
         {
             "target": "Statement",
             "filters": [
-                {
-                    "relevance": {
-                        "target": "Vocabulary",
-                        "filters": {"@rid": relevance_rids},
-                    },
-                }
+                {"relevance": {"target": "Vocabulary", "filters": {"@rid": relevance_rids}}}
             ],
             "returnProperties": [
                 "conditions.@class",
diff --git a/graphkb/match.py b/graphkb/match.py
index a8dffc0..5f0e227 100644
--- a/graphkb/match.py
+++ b/graphkb/match.py
@@ -431,11 +431,7 @@ def match_positional_variant(
         gene1 = parsed['reference1']
 
     gene1_features = get_equivalent_features(
-        conn,
-        gene1,
-        source=gene_source,
-        is_source_id=gene_is_source_id,
-        ignore_cache=ignore_cache,
+        conn, gene1, source=gene_source, is_source_id=gene_is_source_id, ignore_cache=ignore_cache
     )
     features = convert_to_rid_list(gene1_features)
 
@@ -496,9 +492,7 @@ def match_positional_variant(
     ):
         # TODO: Check if variant and reference_variant should be interchanged
         if compare_positional_variants(
-            variant=parsed,
-            reference_variant=cast(PositionalVariant, row),
-            generic=True,
+            variant=parsed, reference_variant=cast(PositionalVariant, row), generic=True
         ):
             filtered_similarAndGeneric.append(row)
             if compare_positional_variants(
@@ -521,7 +515,7 @@ def match_positional_variant(
                     'returnProperties': POS_VARIANT_RETURN_PROPERTIES,
                 },
                 ignore_cache=ignore_cache,
-            ),
+            )
         )
 
     # disambiguate the variant type
@@ -597,7 +591,7 @@ def cat_variant_query(
                     'returnProperties': POS_VARIANT_RETURN_PROPERTIES,
                 },
                 ignore_cache=ignore_cache,
-            ),
+            )
         )
 
     result: Dict[str, Variant] = {}
diff --git a/graphkb/statement.py b/graphkb/statement.py
index 01bcd63..97f80ff 100644
--- a/graphkb/statement.py
+++ b/graphkb/statement.py
@@ -23,9 +23,7 @@ def categorize_relevance(
 
 
 def get_statements_from_variants(
-    graphkb_conn: GraphKBConnection,
-    variants: List[Variant],
-    failed_review: bool = False,
+    graphkb_conn: GraphKBConnection, variants: List[Variant], failed_review: bool = False
 ) -> List[Statement]:
     """Given a list of variant records from GraphKB, return related statements.
 
diff --git a/graphkb/util.py b/graphkb/util.py
index 88fb504..adb375d 100644
--- a/graphkb/util.py
+++ b/graphkb/util.py
@@ -130,12 +130,7 @@ def load(self) -> Optional[float]:
             )
         return None
 
-    def request(
-        self,
-        endpoint: str,
-        method: str = 'GET',
-        **kwargs,
-    ) -> Dict:
+    def request(self, endpoint: str, method: str = 'GET', **kwargs) -> Dict:
         """Request wrapper to handle adding common headers and logging.
 
         Args:
@@ -174,11 +169,7 @@ def request(
                 self.refresh_login()
                 self.request_count += 1
                 resp = requests.request(
-                    method,
-                    url,
-                    headers=self.headers,
-                    timeout=timeout,
-                    **kwargs,
+                    method, url, headers=self.headers, timeout=timeout, **kwargs
                 )
                 if resp.status_code == 401 or resp.status_code == 403:
                     logger.debug(f'/{endpoint} - {resp.status_code} - retrying')
@@ -276,10 +267,7 @@ def query(
                 return self.cache[hash_code]
 
         while True:
-            content = self.post(
-                'query',
-                data={**request_body, 'limit': limit, 'skip': len(result)},
-            )
+            content = self.post('query', data={**request_body, 'limit': limit, 'skip': len(result)})
             records = content['result']
             result.extend(records)
             if len(records) < limit or not paginate:
@@ -371,11 +359,7 @@ def stripRefSeq(breakRepr: str) -> str:
     return breakRepr
 
 
-def stripDisplayName(
-    displayName: str,
-    withRef: bool = True,
-    withRefSeq: bool = True,
-) -> str:
+def stripDisplayName(displayName: str, withRef: bool = True, withRefSeq: bool = True) -> str:
     match: object = re.search(r"^(.*)(\:)(.*)$", displayName)
     if match and not withRef:
         if withRefSeq:
@@ -409,9 +393,7 @@ def stripDisplayName(
 
 
 def stringifyVariant(
-    variant: Union[PositionalVariant, ParsedVariant],
-    withRef: bool = True,
-    withRefSeq: bool = True,
+    variant: Union[PositionalVariant, ParsedVariant], withRef: bool = True, withRefSeq: bool = True
 ) -> str:
     """
     Convert variant record to a string representation (displayName/hgvs)
diff --git a/graphkb/vocab.py b/graphkb/vocab.py
index 94fbdf9..c12e690 100644
--- a/graphkb/vocab.py
+++ b/graphkb/vocab.py
@@ -6,10 +6,7 @@
 
 
 def query_by_name(ontology_class: str, base_term_name: str) -> Dict:
-    return {
-        'target': ontology_class,
-        'filters': {'name': base_term_name},
-    }
+    return {'target': ontology_class, 'filters': {'name': base_term_name}}
 
 
 def get_equivalent_terms(
@@ -53,10 +50,7 @@ def get_equivalent_terms(
             convert_to_rid_list(
                 conn.query(
                     {
-                        'target': {
-                            'target': root_records,
-                            'queryType': 'descendants',
-                        },
+                        'target': {'target': root_records, 'queryType': 'descendants'},
                         'queryType': 'similarTo',
                         'treeEdges': [],
                         'returnProperties': [
@@ -107,10 +101,7 @@ def get_term_tree(
         List[Ontology],
         conn.query(
             {
-                'target': {
-                    'target': base_records,
-                    'queryType': 'ancestors',
-                },
+                'target': {'target': base_records, 'queryType': 'ancestors'},
                 'queryType': 'similarTo',
                 'treeEdges': [],
                 'returnProperties': ['sourceId', 'sourceIdVersion', 'deprecated', 'name', '@rid'],
diff --git a/tests/test_match.py b/tests/test_match.py
index f6498f5..580f045 100644
--- a/tests/test_match.py
+++ b/tests/test_match.py
@@ -147,10 +147,7 @@ def test_low_gain_excludes_amplification(self, conn):
             assert not has_prefix(variant_type, DECREASE_PREFIXES)
 
 
-@pytest.mark.parametrize(
-    'pos1,pos2_start,pos2_end',
-    [[3, 2, 5], [2, None, 5], [3, 2, None]],
-)
+@pytest.mark.parametrize('pos1,pos2_start,pos2_end', [[3, 2, 5], [2, None, 5], [3, 2, None]])
 def test_range_overlap(pos1, pos2_start, pos2_end):
     assert match.positions_overlap({'pos': pos1}, {'pos': pos2_start}, {'pos': pos2_end})
 
@@ -274,18 +271,15 @@ def test_nonspecific_refseq(self):
     def test_ambiguous_refseq(self, seq1, seq2):
         # ambiguous AA matches anything the same length
         assert match.compare_positional_variants(
-            {'break1Start': {'pos': 1}, 'refSeq': seq1},
-            {'break1Start': {'pos': 1}, 'refSeq': seq2},
+            {'break1Start': {'pos': 1}, 'refSeq': seq1}, {'break1Start': {'pos': 1}, 'refSeq': seq2}
         )
 
     def test_refseq_length_mismatch(self):
         assert not match.compare_positional_variants(
-            {'break1Start': {'pos': 1}, 'refSeq': '??'},
-            {'break1Start': {'pos': 1}, 'refSeq': 'T'},
+            {'break1Start': {'pos': 1}, 'refSeq': '??'}, {'break1Start': {'pos': 1}, 'refSeq': 'T'}
         )
         assert not match.compare_positional_variants(
-            {'break1Start': {'pos': 1}, 'refSeq': '?'},
-            {'break1Start': {'pos': 1}, 'refSeq': 'TT'},
+            {'break1Start': {'pos': 1}, 'refSeq': '?'}, {'break1Start': {'pos': 1}, 'refSeq': 'TT'}
         )
 
     def test_diff_altseq(self):
@@ -302,14 +296,12 @@ def test_same_altseq_matches(self):
 
     def test_diff_refseq(self):
         assert not match.compare_positional_variants(
-            {'break1Start': {'pos': 1}, 'refSeq': 'M'},
-            {'break1Start': {'pos': 1}, 'refSeq': 'R'},
+            {'break1Start': {'pos': 1}, 'refSeq': 'M'}, {'break1Start': {'pos': 1}, 'refSeq': 'R'}
         )
 
     def test_same_refseq_matches(self):
         assert match.compare_positional_variants(
-            {'break1Start': {'pos': 1}, 'refSeq': 'R'},
-            {'break1Start': {'pos': 1}, 'refSeq': 'R'},
+            {'break1Start': {'pos': 1}, 'refSeq': 'R'}, {'break1Start': {'pos': 1}, 'refSeq': 'R'}
         )
 
     def test_range_vs_sub(self):
@@ -352,24 +344,15 @@ def test_error_on_duplicate_reference2(self, conn):
 
     def test_uncertain_position_not_supported(self, conn):
         with pytest.raises(NotImplementedError):
-            match.match_positional_variant(
-                conn,
-                '(BCR,ABL1):fusion(e.13_24,e.3)',
-            )
+            match.match_positional_variant(conn, '(BCR,ABL1):fusion(e.13_24,e.3)')
 
     def test_bad_gene_name(self, conn):
         with pytest.raises(FeatureNotFoundError):
-            match.match_positional_variant(
-                conn,
-                'ME-AS-A-GENE:p.G12D',
-            )
+            match.match_positional_variant(conn, 'ME-AS-A-GENE:p.G12D')
 
     def test_bad_gene2_name(self, conn):
         with pytest.raises(FeatureNotFoundError):
-            match.match_positional_variant(
-                conn,
-                '(BCR,ME-AS-A-GENE):fusion(e.13,e.3)',
-            )
+            match.match_positional_variant(conn, '(BCR,ME-AS-A-GENE):fusion(e.13,e.3)')
 
     def test_match_explicit_reference1(self, conn):
         reference1 = conn.query({'target': 'Feature', 'filters': {'name': 'KRAS'}})[0]['@rid']
diff --git a/tests/test_statement.py b/tests/test_statement.py
index 6d7a01d..76264e0 100644
--- a/tests/test_statement.py
+++ b/tests/test_statement.py
@@ -89,10 +89,6 @@ def test_custom_categories(self, graphkb_conn):
 @pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
 class TestStatementMatch:
     def test_truncating_categories(self, conn):
-        variant = {
-            '@class': 'CategoryVariant',
-            '@rid': '#161:429',
-            'displayName': 'RB1 truncating',
-        }
+        variant = {'@class': 'CategoryVariant', '@rid': '#161:429', 'displayName': 'RB1 truncating'}
         statements = statement.get_statements_from_variants(conn, [variant])
         assert statements
diff --git a/tests/test_util.py b/tests/test_util.py
index cf15f3e..05f0af2 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -46,11 +46,7 @@ def test_convert_aa_3to1(input, result):
 
 class TestOntologyTermRepr:
     @pytest.mark.parametrize(
-        'termStr,termRepr',
-        [
-            ['missense mutation', 'missense mutation'],
-            ['', ''],
-        ],
+        'termStr,termRepr', [['missense mutation', 'missense mutation'], ['', '']]
     )
     def test_ontologyTermRepr_str(self, termStr, termRepr):
         assert util.ontologyTermRepr(termStr) == termRepr

From 2ab45566218a4f4d93b12c7dd3923cd3232d4e6d Mon Sep 17 00:00:00 2001
From: Dustin Bleile <dbleile@bcgsc.ca>
Date: Wed, 7 Jun 2023 10:56:48 -0700
Subject: [PATCH 2/6] lint - isort

---
 graphkb/constants.py    | 1 -
 graphkb/match.py        | 2 +-
 graphkb/util.py         | 2 +-
 tests/test_statement.py | 2 +-
 tests/test_util.py      | 6 ++++--
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/graphkb/constants.py b/graphkb/constants.py
index 1fb76c5..55f7e26 100644
--- a/graphkb/constants.py
+++ b/graphkb/constants.py
@@ -1,5 +1,4 @@
 import argparse
-
 from typing import Dict
 
 from .types import CategoryBaseTermMapping
diff --git a/graphkb/match.py b/graphkb/match.py
index 5f0e227..829f75a 100644
--- a/graphkb/match.py
+++ b/graphkb/match.py
@@ -13,8 +13,8 @@
 )
 from .types import BasicPosition, Ontology, ParsedVariant, PositionalVariant, Record, Variant
 from .util import (
-    convert_to_rid_list,
     FeatureNotFoundError,
+    convert_to_rid_list,
     logger,
     looks_like_rid,
     stringifyVariant,
diff --git a/graphkb/util.py b/graphkb/util.py
index adb375d..506822e 100644
--- a/graphkb/util.py
+++ b/graphkb/util.py
@@ -10,7 +10,7 @@
 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
 
-from .constants import DEFAULT_LIMIT, DEFAULT_URL, AA_3to1_MAPPING, TYPES_TO_NOTATION
+from .constants import DEFAULT_LIMIT, DEFAULT_URL, TYPES_TO_NOTATION, AA_3to1_MAPPING
 from .types import OntologyTerm, ParsedVariant, PositionalVariant, Record
 
 QUERY_CACHE: Dict[Any, Any] = {}
diff --git a/tests/test_statement.py b/tests/test_statement.py
index 76264e0..b4883e5 100644
--- a/tests/test_statement.py
+++ b/tests/test_statement.py
@@ -1,6 +1,6 @@
+import os
 from unittest.mock import Mock
 
-import os
 import pytest
 
 from graphkb import statement
diff --git a/tests/test_util.py b/tests/test_util.py
index 05f0af2..e05388b 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,6 +1,8 @@
-import pytest
 import os
-from graphkb import util, GraphKBConnection
+
+import pytest
+
+from graphkb import GraphKBConnection, util
 
 
 class OntologyTerm:

From dc84a38af4a50e7cd0fb330a4a9b36d0c0b19603 Mon Sep 17 00:00:00 2001
From: Dustin Bleile <dbleile@bcgsc.ca>
Date: Wed, 7 Jun 2023 12:17:37 -0700
Subject: [PATCH 3/6] lint - flake8 warnings

---
 graphkb/__init__.py     | 4 ++--
 graphkb/util.py         | 2 +-
 tests/test_genes.py     | 1 -
 tests/test_statement.py | 4 +---
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/graphkb/__init__.py b/graphkb/__init__.py
index 54ee098..a6fdd66 100644
--- a/graphkb/__init__.py
+++ b/graphkb/__init__.py
@@ -1,2 +1,2 @@
-from .constants import DEFAULT_URL
-from .util import GraphKBConnection, logger
+from .constants import DEFAULT_URL  # noqa: F401
+from .util import GraphKBConnection, logger  # noqa: F401
diff --git a/graphkb/util.py b/graphkb/util.py
index 506822e..f4327fd 100644
--- a/graphkb/util.py
+++ b/graphkb/util.py
@@ -498,7 +498,7 @@ def stringifyVariant(
             if withRefSeq:
                 result.append(f"del{refSeq}ins")
             else:
-                result.append(f"delins")
+                result.append("delins")
         else:
             result.append(notationType)
         if truncation and truncation != 1:
diff --git a/tests/test_genes.py b/tests/test_genes.py
index 51b2a7c..ac10bc4 100644
--- a/tests/test_genes.py
+++ b/tests/test_genes.py
@@ -6,7 +6,6 @@
 import pytest
 
 from graphkb import GraphKBConnection
-from graphkb.constants import FUSION_NAMES
 from graphkb.genes import (
     get_cancer_predisposition_info,
     get_genes_from_variant_types,
diff --git a/tests/test_statement.py b/tests/test_statement.py
index b4883e5..2c7b8e7 100644
--- a/tests/test_statement.py
+++ b/tests/test_statement.py
@@ -36,9 +36,7 @@ def term_tree_calls(*final_values):
 
     query_mock = Mock()
     query_mock.side_effect = return_values
-    conn = Mock(query=query_mock, cache={})
-
-    return conn
+    return Mock(query=query_mock, cache={})
 
 
 class TestCategorizeRelevance:

From 6cd489c71e3f1ba7fd6dfb262502c193c2c7fb79 Mon Sep 17 00:00:00 2001
From: Dustin Bleile <dbleile@bcgsc.ca>
Date: Wed, 7 Jun 2023 12:23:14 -0700
Subject: [PATCH 4/6] lint - gene.py docstring fixes.

---
 graphkb/genes.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/graphkb/genes.py b/graphkb/genes.py
index f270afa..4bbe9d6 100644
--- a/graphkb/genes.py
+++ b/graphkb/genes.py
@@ -1,6 +1,4 @@
-"""
-Methods for retrieving gene annotation lists from GraphKB
-"""
+"""Methods for retrieving gene annotation lists from GraphKB."""
 from typing import Any, Dict, List, Tuple, cast
 
 from . import GraphKBConnection
@@ -50,7 +48,7 @@ def _get_oncokb_gene_list(
 
 
 def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]:
-    """Gets the list of oncogenes stored in GraphKB derived from OncoKB.
+    """Get the list of oncogenes stored in GraphKB derived from OncoKB.
 
     Args:
         conn: the graphkb connection object
@@ -62,7 +60,7 @@ def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]:
 
 
 def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]:
-    """Gets the list of tumour supressor genes stored in GraphKB derived from OncoKB.
+    """Get the list of tumour supressor genes stored in GraphKB derived from OncoKB.
 
     Args:
         conn: the graphkb connection object
@@ -116,8 +114,7 @@ def get_genes_from_variant_types(
     source_record_ids: List[str] = [],
     ignore_cache: bool = False,
 ) -> List[Ontology]:
-    """
-    Retrieve a list of Genes which are found in variants on the given types
+    """Retrieve a list of Genes which are found in variants on the given types.
 
     Args:
         conn: the graphkb connection object

From a8ccc0b9afef909c7d0c4536b7d9b2fe7fe63557 Mon Sep 17 00:00:00 2001
From: dustinbleile <dustinbleile@gmail.com>
Date: Thu, 8 Jun 2023 15:50:27 -0700
Subject: [PATCH 5/6] GERO-337 - improvement - add get_gene_information to
 genes.py - originally from pori_ipr_python.

---
 graphkb/genes.py    | 106 ++++++++++++++++++++++++++++++++++++++++----
 tests/test_genes.py |  53 ++++++++++++++++++++++
 2 files changed, 150 insertions(+), 9 deletions(-)

diff --git a/graphkb/genes.py b/graphkb/genes.py
index 4bbe9d6..9965b47 100644
--- a/graphkb/genes.py
+++ b/graphkb/genes.py
@@ -1,10 +1,12 @@
 """Methods for retrieving gene annotation lists from GraphKB."""
-from typing import Any, Dict, List, Tuple, cast
+from typing import Any, Dict, List, Sequence, Set, Tuple, cast
 
 from . import GraphKBConnection
 from .constants import (
     BASE_THERAPEUTIC_TERMS,
     CHROMOSOMES,
+    FAILED_REVIEW_STATUS,
+    GENERIC_RETURN_PROPERTIES,
     GENE_RETURN_PROPERTIES,
     ONCOGENE,
     ONCOKB_SOURCE_NAME,
@@ -124,14 +126,18 @@ def get_genes_from_variant_types(
     Returns:
         List.<dict>: gene (Feature) records
     """
+    filters: List[Dict[str, Any]] = []
+    if types:
+        filters.append(
+            {'type': {'target': 'Vocabulary', 'filters': {'name': types, 'operator': 'IN'}}}
+        )
+
     variants = cast(
         List[Variant],
         conn.query(
             {
                 'target': 'Variant',
-                'filters': [
-                    {'type': {'target': 'Vocabulary', 'filters': {'name': types, 'operator': 'IN'}}}
-                ],
+                'filters': filters,
                 'returnProperties': ['reference1', 'reference2'],
             },
             ignore_cache=ignore_cache,
@@ -139,20 +145,17 @@ def get_genes_from_variant_types(
     )
 
     genes = set()
-
     for variant in variants:
         genes.add(variant['reference1'])
-
         if variant['reference2']:
             genes.add(variant['reference2'])
+    if not genes:
+        return []
 
     filters: List[Dict[str, Any]] = [{'biotype': 'gene'}]
-
     if source_record_ids:
         filters.append({'source': source_record_ids, 'operator': 'IN'})
 
-    if not genes:
-        return []
     result = cast(
         List[Ontology],
         conn.query(
@@ -351,3 +354,88 @@ def get_pharmacogenomic_info(conn: GraphKBConnection) -> Tuple[List[str], Dict[s
         logger.error(f"Unable to find gene for '{name}' ({biotype})")
 
     return sorted(genes), variants
+
+
+def convert_to_rid_set(records: Sequence[Dict]) -> Set[str]:
+    return {r['@rid'] for r in records}
+
+
+def get_gene_information(
+    graphkb_conn: GraphKBConnection, gene_names: Sequence[str]
+) -> List[Dict[str, bool]]:
+    """Create a list of gene_info flag dicts for IPR report upload.
+
+    Function is originally from pori_ipr_python::annotate.py
+
+    Gene flags (categories) are: ['cancerRelated', 'knownFusionPartner', 'knownSmallMutation',
+                                  'oncogene', 'therapeuticAssociated', 'tumourSuppressor']
+
+    Args:
+        graphkb_conn ([type]): [description]
+        gene_names ([type]): [description]
+    Returns:
+        List of gene_info dicts of form [{'name':<gene_str>, <flag>: True}]
+        Keys of False values are simply omitted from ipr upload to reduce info transfer.
+            eg. [{'cancerRelated': True,
+                  'knownFusionPartner': True,
+                  'knownSmallMutation': True,
+                  'name': 'TERT',
+                  'oncogene': True}]
+    """
+    logger.info('fetching variant related genes list')
+    # For query speed, only fetch the minimum needed details
+    ret_props = [
+        'conditions.@rid',
+        'conditions.@class',
+        'conditions.reference1',
+        'conditions.reference2',
+        'reviewStatus',
+    ]
+    body: Dict[str, Any] = {'target': 'Statement', 'returnProperties': ret_props}
+
+    gene_names = sorted(set(gene_names))
+    statements = graphkb_conn.query(body)
+    statements = [s for s in statements if s.get('reviewStatus') != FAILED_REVIEW_STATUS]
+
+    gene_flags: Dict[str, Set[str]] = {
+        'cancerRelated': set(),
+        'knownFusionPartner': set(),
+        'knownSmallMutation': set(),
+    }
+
+    for statement in statements:
+        for condition in statement['conditions']:
+            if not condition.get('reference1'):
+                continue
+            gene_flags['cancerRelated'].add(condition['reference1'])
+            if condition['reference2']:
+                gene_flags['cancerRelated'].add(condition['reference2'])
+                gene_flags['knownFusionPartner'].add(condition['reference1'])
+                gene_flags['knownFusionPartner'].add(condition['reference2'])
+            elif condition['@class'] == 'PositionalVariant':
+                gene_flags['knownSmallMutation'].add(condition['reference1'])
+
+    logger.info('fetching oncogenes list')
+    gene_flags['oncogene'] = convert_to_rid_set(get_oncokb_oncogenes(graphkb_conn))
+    logger.info('fetching tumour supressors list')
+    gene_flags['tumourSuppressor'] = convert_to_rid_set(get_oncokb_tumour_supressors(graphkb_conn))
+
+    logger.info('fetching therapeutic associated genes lists')
+    gene_flags['therapeuticAssociated'] = convert_to_rid_set(
+        get_therapeutic_associated_genes(graphkb_conn)
+    )
+
+    logger.info(f"Setting gene_info flags on {len(gene_names)} genes")
+    result = []
+    for gene_name in gene_names:
+        equivalent = convert_to_rid_set(get_equivalent_features(graphkb_conn, gene_name))
+        row = {'name': gene_name}
+        flagged = False
+        for flag in gene_flags:
+            # make smaller JSON to upload since all default to false already
+            if equivalent.intersection(gene_flags[flag]):
+                row[flag] = flagged = True
+        if flagged:
+            result.append(row)
+
+    return result
diff --git a/tests/test_genes.py b/tests/test_genes.py
index ac10bc4..12e1e9d 100644
--- a/tests/test_genes.py
+++ b/tests/test_genes.py
@@ -9,6 +9,7 @@
 from graphkb.genes import (
     get_cancer_predisposition_info,
     get_genes_from_variant_types,
+    get_gene_information,
     get_oncokb_oncogenes,
     get_oncokb_tumour_supressors,
     get_pharmacogenomic_info,
@@ -177,3 +178,55 @@ def test_get_therapeutic_associated_genes(conn):
     names = {row['name'] for row in gene_list}
     for gene in CANNONICAL_THERAPY_GENES + CANONICAL_ONCOGENES + CANONICAL_TS:
         assert gene in names, f"{gene} not found by get_therapeutic_associated_genes"
+
+
+@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="excluding long running integration tests")
+def test_get_gene_information(conn):
+    gene_info = get_gene_information(
+        conn,
+        CANONICAL_ONCOGENES
+        + CANONICAL_TS
+        + CANONICAL_FUSION_GENES
+        + CANONICAL_STRUCTURAL_VARIANT_GENES
+        + CANNONICAL_THERAPY_GENES
+        + ['notagenename'],
+    )
+    assert gene_info
+    nongene_flagged = [g['name'] for g in gene_info if g['name'] == 'notagenename']
+    assert not nongene_flagged, f"Improper gene category: {nongene_flagged}"
+
+    for gene in CANONICAL_ONCOGENES:
+        assert gene in [
+            g['name'] for g in gene_info if g.get('oncogene')
+        ], f"Missed oncogene {gene}"
+
+    for gene in CANONICAL_TS:
+        assert gene in [
+            g['name'] for g in gene_info if g.get('tumourSuppressor')
+        ], f"Missed 'tumourSuppressor' {gene}"
+
+    for gene in CANONICAL_FUSION_GENES:
+        assert gene in [
+            g['name'] for g in gene_info if g.get('knownFusionPartner')
+        ], f"Missed knownFusionPartner {gene}"
+
+    for gene in CANONICAL_STRUCTURAL_VARIANT_GENES:
+        assert gene in [
+            g['name'] for g in gene_info if g.get('knownSmallMutation')
+        ], f"Missed knownSmallMutation {gene}"
+
+    for gene in CANNONICAL_THERAPY_GENES:
+        assert gene in [
+            g['name'] for g in gene_info if g.get('therapeuticAssociated')
+        ], f"Missed therapeuticAssociated {gene}"
+
+    for gene in (
+        CANONICAL_ONCOGENES
+        + CANONICAL_TS
+        + CANONICAL_FUSION_GENES
+        + CANONICAL_STRUCTURAL_VARIANT_GENES
+        + CANNONICAL_THERAPY_GENES
+    ):
+        assert gene in [
+            g['name'] for g in gene_info if g.get('cancerRelated')
+        ], f"Missed cancerRelated {gene}"

From 422378591e060382ef0db807f24c0bd595f8d71e Mon Sep 17 00:00:00 2001
From: Dustin Bleile <dbleile@bcgsc.ca>
Date: Tue, 13 Jun 2023 12:56:01 -0700
Subject: [PATCH 6/6] Release v1.11.0 New Feature:  * GERO-337 -
 get_gene_information function added. Improvements:  * update docstrings and
 import orders

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 764caa9..c623b52 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -10,7 +10,7 @@ include_trailing_comma = true
 [metadata]
 name = graphkb
 url = https://github.com/bcgsc/pori_graphkb_python
-version = 1.10.2
+version = 1.11.0
 author_email = graphkb@bcgsc.ca
 description = python adapter for interacting with the GraphKB API
 long_description = file: README.md