Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Utility class #39

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ v0.4

- Disabling ``mv_store`` feature of H2.
- Fixing various bugs in ``H2ScoreDistributionReader`` and ``H2ScoreDistributionWriter``.
- Adding class ``OntologyAlgorithm`` with test class ``OntologyAlgorithmTest``. Implements functions to get children, parents, descendents and ancestors.

----
v0.3
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
package com.github.phenomics.ontolib.ontology.algo;


import com.github.phenomics.ontolib.formats.hpo.HpoFrequency;
import com.github.phenomics.ontolib.formats.hpo.HpoFrequencyTermIds;
import com.github.phenomics.ontolib.formats.hpo.HpoModeOfInheritanceTermIds;
import com.github.phenomics.ontolib.formats.hpo.HpoSubOntologyRootTermIds;
import com.github.phenomics.ontolib.graph.algo.BreadthFirstSearch;
import com.github.phenomics.ontolib.graph.data.DirectedGraph;
import com.github.phenomics.ontolib.graph.data.Edge;
import com.github.phenomics.ontolib.graph.data.ImmutableEdge;
import com.github.phenomics.ontolib.ontology.data.Ontology;
import com.github.phenomics.ontolib.ontology.data.TermId;
import com.google.common.collect.ImmutableSet;

import java.util.*;

/**
* Implementation of several commonly needed algorithms for traversing and searching in
* and {@link com.github.phenomics.ontolib.ontology.data.Ontology}.
*
* @see HpoFrequency
* @see HpoFrequencyTermIds
* @see HpoModeOfInheritanceTermIds
* @see HpoSubOntologyRootTermIds
*
* @author <a href="mailto:[email protected]">Peter Robinson</a>

*/
public class OntologyAlgorithm {



public static boolean existsPath(Ontology ontology, final TermId sourceID, TermId destID){
// special case -- a term cannot have a path to itself in an ontology (DAG)
if (sourceID.equals(destID)) return false;
final DirectedGraph<TermId, ImmutableEdge<TermId>> graph=ontology.getGraph();
List<TermId> visited = new ArrayList<>();
BreadthFirstSearch<TermId, ImmutableEdge<TermId>> bfs = new BreadthFirstSearch<>();
bfs.startFromForward(graph, sourceID, (g, termId) -> {
visited.add(termId);
return true;
});
return visited.contains(destID);
}


/**
* Find all of the direct children of parentTermId. Include parentTermId itself in the returned set.
* @param ontology The ontology to which parentTermId belongs
* @param parentTermId The term whose children were are seeking
* @return A set of all child terms of parentTermId (including parentTermId itself)
*/
public static Set<TermId> getChildTerms(Ontology ontology, TermId parentTermId) {
return getChildTerms(ontology,parentTermId,true);
}

/**
* Find all of the direct children of parentTermId (do not include "grandchildren" and other descendents).
* @param ontology The ontology to which parentTermId belongs
* @param parentTermId The term whose children were are seeking
* @param includeOriginalTerm true if we should include the term itself in the set of returned child terms
* @return A set of all child terms of parentTermId
*/
public static Set<TermId> getChildTerms(Ontology ontology, TermId parentTermId, boolean includeOriginalTerm) {
ImmutableSet.Builder <TermId> kids = new ImmutableSet.Builder<>();
if (includeOriginalTerm) kids.add(parentTermId);
Iterator it = ontology.getGraph().inEdgeIterator(parentTermId);
while (it.hasNext()) {
Edge<TermId> edge = (Edge<TermId>) it.next();
TermId sourceId=edge.getSource();
kids.add(sourceId);
}
return kids.build();
}

/**
* Finds the direct child terms of a set of parent terms.
* @param ontology The ontology to which the set of parentTermIds belong
* @param parentTermIdSet The terms whose children were are seeking
* @return set of children of parentTermIdSet
*/
public static Set<TermId> getChildTerms(Ontology ontology, Set<TermId> parentTermIdSet) {
ImmutableSet.Builder <TermId> kids = new ImmutableSet.Builder<>();
for (TermId tid:parentTermIdSet) {
kids.addAll(getChildTerms(ontology, tid));
}
return kids.build();
}

/**
* Finds the direct parent terms of a set of child terms
* @param ontology The ontology to which the set of childTermIds belong
* @param childTermIdSet The terms whose parents we are seeking
* @return set of parents of childTermIdSet
*/
public static Set<TermId> getParentTerms(Ontology ontology, Set<TermId> childTermIdSet) {
ImmutableSet.Builder <TermId> parents = new ImmutableSet.Builder<>();
for (TermId tid:childTermIdSet) {
parents.addAll(getParentTerms(ontology, tid));
}
return parents.build();
}




/**
* Find all of the descendents of parentTermId (including direct children and more distant descendents)
* @param ontology The ontology to which parentTermId belongs
* @param parentTermId The term whose descendents were are seeking
* @return A set of all descendents of parentTermId (including the parentTermId itself)
*/
public static Set<TermId> getDescendents(Ontology ontology, TermId parentTermId) {
ImmutableSet.Builder<TermId> descset = new ImmutableSet.Builder<>();
Stack<TermId> stack = new Stack<>();
stack.push(parentTermId);
while (! stack.empty() ) {
TermId tid = stack.pop();
descset.add(tid);
Set<TermId> directChildrenSet = getChildTerms(ontology,tid,false);
directChildrenSet.forEach(t -> stack.push(t));
}
return descset.build();
}

/** Find all of the direct parents of childTermId (do not include "grandchildren" and other descendents).
* @param ontology The ontology to which parentTermId belongs
* @param childTermId The term whose parents were are seeking
* @param includeOriginalTerm true if we should include the term itself in the set of returned parent terms
* @return A set of all parent terms of childTermId
*/
public static Set<TermId> getParentTerms(Ontology ontology, TermId childTermId, boolean includeOriginalTerm) {
ImmutableSet.Builder<TermId> anccset = new ImmutableSet.Builder<>();
if (includeOriginalTerm) anccset.add(childTermId);
Iterator it = ontology.getGraph().outEdgeIterator(childTermId);
while (it.hasNext()) {
Edge<TermId> edge = (Edge<TermId>) it.next();
TermId destId=edge.getDest();
anccset.add(destId);
}
return anccset.build();
}


/** Find all of the direct parents of childTermId (do not include "grandchildren" and other descendents).
* @param ontology The ontology to which parentTermId belongs
* @param childTermId The term whose parents were are seeking
* @return A set of all parent terms of childTermId including childTermId itself
*/
public static Set<TermId> getParentTerms(Ontology ontology, TermId childTermId) {
return getParentTerms(ontology,childTermId,true);
}

/**
* Find all the ancestor terms of childTermId, including parents and so on up to the root.
* This is a wrapper around the function {@link Ontology#getAncestorTermIds(TermId)}
* for convenience - it is the counterpart of {@link #getDescendents(Ontology, TermId)}
* @param ontology The ontology to which childTermId belongs
* @param childTermId The term whose ancestors were are seeking
* @return A set of all ancestors of childTermId
*/
public static Set<TermId> getAncestorTerms(Ontology ontology, TermId childTermId) {
return ontology.getAncestorTermIds(childTermId);
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
package com.github.phenomics.ontolib.ontology.algo;

import com.github.phenomics.ontolib.graph.data.ImmutableDirectedGraph;
import com.github.phenomics.ontolib.graph.data.ImmutableEdge;
import com.github.phenomics.ontolib.ontology.data.*;
import com.google.common.collect.*;
import org.junit.Before;
import org.junit.Test;

import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;

import static com.github.phenomics.ontolib.ontology.algo.OntologyAlgorithm.*;
import static org.junit.Assert.*;

public class OntologyAlgorithmTest {

private ImmutableSortedMap<String, String> metaInfo;
private ImmutableList<TermId> vertices;
private ImmutableList<ImmutableEdge<TermId>> edges;
private ImmutableDirectedGraph<TermId, ImmutableEdge<TermId>> graph;

private TermId rootTermId;
private ImmutableMap<TermId, TestTerm> termMap;
private ImmutableMap<TermId, TestTerm> obsoleteTermMap;
private ImmutableMap<Integer, TestTermRelation> relationMap;
private ImmutableOntology<TestTerm, TestTermRelation> ontology;

private ImmutableTermId id1;
private ImmutableTermId id2;
private ImmutableTermId id3;
private ImmutableTermId id4;
private ImmutableTermId id5;

@Before
public void setUp() {
metaInfo = ImmutableSortedMap.of();

id1 = ImmutableTermId.constructWithPrefix("HP:0000001");
id2 = ImmutableTermId.constructWithPrefix("HP:0000002");
id3 = ImmutableTermId.constructWithPrefix("HP:0000003");
id4 = ImmutableTermId.constructWithPrefix("HP:0000004");
id5 = ImmutableTermId.constructWithPrefix("HP:0000005");
vertices = ImmutableList.of(id1, id2, id3, id4, id5);
edges =
ImmutableList.of(ImmutableEdge.construct(id1, id2, 1), ImmutableEdge.construct(id1, id3, 2),
ImmutableEdge.construct(id1, id4, 3), ImmutableEdge.construct(id2, id5, 4),
ImmutableEdge.construct(id3, id5, 5), ImmutableEdge.construct(id4, id5, 6));
graph = ImmutableDirectedGraph.construct(edges);

rootTermId = id5;

ImmutableMap.Builder<TermId, TestTerm> termMapBuilder = ImmutableMap.builder();
termMapBuilder.put(id1, new TestTerm(id1, new ArrayList<>(), "term1", "some definition 1", null,
new ArrayList<>(), new ArrayList<>(), false, null, null, new ArrayList<>()));
termMapBuilder.put(id2, new TestTerm(id2, new ArrayList<>(), "term2", "some definition 2", null,
new ArrayList<>(), new ArrayList<>(), false, null, null, new ArrayList<>()));
termMapBuilder.put(id3, new TestTerm(id3, new ArrayList<>(), "term3", "some definition 3", null,
new ArrayList<>(), new ArrayList<>(), false, null, null, new ArrayList<>()));
termMapBuilder.put(id4, new TestTerm(id4, new ArrayList<>(), "term4", "some definition 4", null,
new ArrayList<>(), new ArrayList<>(), false, null, null, new ArrayList<>()));
termMapBuilder.put(id5, new TestTerm(id5, new ArrayList<>(), "term5", "some definition 5", null,
new ArrayList<>(), new ArrayList<>(), false, null, null, new ArrayList<>()));
termMap = termMapBuilder.build();

obsoleteTermMap = ImmutableMap.of();

ImmutableMap.Builder<Integer, TestTermRelation> relationMapBuilder = ImmutableMap.builder();
relationMapBuilder.put(1, new TestTermRelation(id1, id2, 1));
relationMapBuilder.put(2, new TestTermRelation(id1, id3, 2));
relationMapBuilder.put(3, new TestTermRelation(id1, id4, 3));
relationMapBuilder.put(4, new TestTermRelation(id2, id5, 4));
relationMapBuilder.put(5, new TestTermRelation(id3, id5, 5));
relationMapBuilder.put(6, new TestTermRelation(id4, id5, 6));
relationMap = relationMapBuilder.build();

ontology = new ImmutableOntology<>(metaInfo, graph, rootTermId,
termMap.keySet(), obsoleteTermMap.keySet(), termMap, relationMap);
}


/** The example graph has id1->id2, id1->id3, id1->id4, id2->id5, id3-> id5, id4->id5 */
@Test
public void testPathExists() {
assertTrue(existsPath(ontology,id1,id2));
assertFalse(existsPath(ontology,id2,id1));
assertTrue(existsPath(ontology,id1,id3));
assertFalse(existsPath(ontology,id3,id1));
assertTrue(existsPath(ontology,id1,id4));
assertFalse(existsPath(ontology,id4,id1));
assertTrue(existsPath(ontology,id1,id5));
assertFalse(existsPath(ontology,id5,id1));
assertTrue(existsPath(ontology,id2,id5));
assertFalse(existsPath(ontology,id5,id2));
assertTrue(existsPath(ontology,id4,id5));
assertFalse(existsPath(ontology,id5,id4));
// test that a term cannot have a path to itself.
assertFalse(existsPath(ontology,id5,id5));
}

/** Test the default function, which includes the term itself in the set of returned terms */
@Test
public void testGetTermChildrenId4andId1() {
// id4 has only one child term, id1: id1->id4
Set<TermId> expected = ImmutableSet.of(id4,id1);
assertEquals(expected, getChildTerms(ontology,id4));
// id1 is a leaf term and thus has no children
expected = ImmutableSet.of(id1);
assertEquals(expected, getChildTerms(ontology,id1));
}


/** Test the default function, which includes the term itself in the set of returned terms */
@Test
public void testGetTermChildrenId5() {
// id5 has 3 children: id2->id5, id3-> id5, id4->id5
Set<TermId> expected = ImmutableSet.of(id2,id3,id4,id5);
assertEquals(expected, getChildTerms(ontology,id5));
}

@Test
public void testGetChildrenOfSet() {
// the child of both id2 and id3 is id1
Set<TermId> queryTerms = ImmutableSet.of(id2,id3);
Set<TermId> expected = ImmutableSet.of(id1,id2,id3);
assertEquals(expected,getChildTerms(ontology,queryTerms));
}



/** We are using a version of the function getChildTerms that does not return the query (parent) term. */
@Test
public void testReturnChildrenWithoutOriginalTerm() {
// id5 has 3 children: id2->id5, id3-> id5, id4->id5
Set<TermId> expected = ImmutableSet.of(id2,id3,id4);
assertEquals(expected, getChildTerms(ontology,id5,false));
}

/** getDescendents returns not only children but all descendents.
* id1 is a child of id3, which is a child of id5, so id1 is a descendent but not a child of id5*/
@Test
public void testGetDescendents() {
Set<TermId> expected = ImmutableSet.of(id1,id2,id3,id4,id5);
assertEquals(expected,getDescendents(ontology,id5));
}

@Test
public void testGetParentsId2() {
// the only ancestor of id2 is id5: id2->id5
Set<TermId> expected = ImmutableSet.of(id2,id5);
assertEquals(expected,getParentTerms(ontology,id2));
// id2 is not an ancestor of id5
assertNotEquals(expected,getParentTerms(ontology,id5));
// instead, only id5 is an ancestor of id5
expected=ImmutableSet.of(id5);
assertEquals(expected,getParentTerms(ontology,id5));
}

@Test
public void testGetParentsId1() {
// id1 has three parents. Since id5 is a parent of both id2 ans id1, id1 has three ancestors (four including id1)
//id1->id2, id1->id3, id1 -> id4; id2->id5, id3-> id5,
// id5 is not a parent of id1, though!
Set<TermId> expected = ImmutableSet.of(id1,id2,id3,id4);
assertEquals(expected,getParentTerms(ontology,id1));
}

@Test
public void testGetParentsOfSet() {
//id3-> id5, id4->id5
Set<TermId> queryTerms = ImmutableSet.of(id3,id4);
Set<TermId> expected = ImmutableSet.of(id3,id4,id5);
assertEquals(expected,getParentTerms(ontology,queryTerms));
}

@Test
public void testGetAncestorsId1() {
// id1 has id2, id3, id4m and id5 as ancestors
Set<TermId> expected = ImmutableSet.of(id1,id2,id3,id4,id5);
assertEquals(expected,getAncestorTerms(ontology,id1));
}

@Test
public void testRootHasNoParent() {
// id5 is the root of our graph and does not have a parent term other than itself
Set<TermId> expected = ImmutableSet.of(id5);
assertEquals(expected,getParentTerms(ontology,id5));
}

/** We are using a version of the function getChildTerms that does not return the query (parent) term. */
@Test
public void testReturnParentWithoutOriginalTerm() {
// id1 has three parents. Since id5 is a parent of both id2 ans id1, id1 has three ancestors (four including id1)
//id1->id2, id1->id3, id1 -> id4; id2->id5, id3-> id5,
// id5 is not a parent of id1, though!
Set<TermId> expected = ImmutableSet.of(id2,id3,id4);
assertEquals(expected,getParentTerms(ontology,id1,false));
// The root has no parent, we expect the empty set
expected = new HashSet<>();
assertEquals(expected,getParentTerms(ontology,id5,false));
}


}