Skip to content

Commit

Permalink
lib: update the VariantAnnotationCalculator to support multi-species,…
Browse files Browse the repository at this point in the history
… #TASK-6426, #TASK-5564
  • Loading branch information
jtarraga committed Aug 2, 2024
1 parent 36c3609 commit efa4824
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ public LoadCommandExecutor(AdminCliOptionsParser.LoadCommandOptions loadCommandO
} else {
loadOptions = loadCommandOptions.data.split(",");
}


if (loadCommandOptions.field != null) {
field = loadCommandOptions.field;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public void execute() {
DataRelease dataRelease = dataReleaseManager.get(validationCommandOptions.dataRelease);
variantAnnotationCalculator = new VariantAnnotationCalculator(validationCommandOptions.species,
validationCommandOptions.assembly, dataRelease, validationCommandOptions.apiKey,
cellBaseManagerFactory);
cellBaseManagerFactory, configuration);
} catch (CellBaseException e) {
e.printStackTrace();
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ private boolean runAnnotation() throws Exception {
DataReleaseManager dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager(species, assembly);
DataRelease dataRelease = dataReleaseManager.get(variantAnnotationCommandOptions.dataRelease);
VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
dataRelease, variantAnnotationCommandOptions.apiKey, cellBaseManagerFactory);
dataRelease, variantAnnotationCommandOptions.apiKey, cellBaseManagerFactory, configuration);
List<CellBaseDataResult<VariantAnnotation>> annotationByVariantList =
variantAnnotationCalculator.getAnnotationByVariantList(variants, serverQueryOptions);

Expand Down Expand Up @@ -485,7 +485,7 @@ private VariantAnnotator createCellBaseAnnotator() throws CellBaseException {
DataReleaseManager dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager(species, assembly);
DataRelease dataRelease = dataReleaseManager.get(variantAnnotationCommandOptions.dataRelease);
return new CellBaseLocalVariantAnnotator(new VariantAnnotationCalculator(species, assembly, dataRelease,
variantAnnotationCommandOptions.apiKey, cellBaseManagerFactory), serverQueryOptions);
variantAnnotationCommandOptions.apiKey, cellBaseManagerFactory, configuration), serverQueryOptions);
} else {
try {
ClientConfiguration clientConfiguration = ClientConfiguration.load(getClass()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.cellbase.core.ParamConstants;
import org.opencb.cellbase.core.api.VariantQuery;
import org.opencb.cellbase.core.api.key.ApiKeyLicensedDataUtils;
import org.opencb.cellbase.core.api.query.CellBaseQueryOptions;
import org.opencb.cellbase.core.api.query.QueryException;
import org.opencb.cellbase.core.config.CellBaseConfiguration;
Expand All @@ -38,7 +39,6 @@
import org.opencb.cellbase.lib.impl.core.CellBaseCoreDBAdaptor;
import org.opencb.cellbase.lib.impl.core.SpliceScoreMongoDBAdaptor;
import org.opencb.cellbase.lib.impl.core.VariantMongoDBAdaptor;
import org.opencb.cellbase.core.api.key.ApiKeyLicensedDataUtils;
import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
import org.opencb.cellbase.lib.variant.annotation.CellBaseNormalizerSequenceAdaptor;
import org.opencb.cellbase.lib.variant.annotation.VariantAnnotationCalculator;
Expand Down Expand Up @@ -96,7 +96,7 @@ public List<CellBaseDataResult<String>> getHgvsByVariant(String variants, DataRe
HgvsCalculator hgvsCalculator = new HgvsCalculator(genomeManager, dataRelease.getRelease());
List<CellBaseDataResult<String>> results = new ArrayList<>();
VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
dataRelease, "", cellbaseManagerFactory);
dataRelease, "", cellbaseManagerFactory, configuration);
List<Gene> batchGeneList = variantAnnotationCalculator.getBatchGeneList(variantList);
for (Variant variant : variantList) {
List<Gene> variantGeneList = variantAnnotationCalculator.getAffectedGenes(batchGeneList, variant);
Expand All @@ -120,7 +120,7 @@ public CellBaseDataResult<Variant> getNormalizationByVariant(String variants, bo
DataRelease dataRelease) throws CellBaseException {
List<Variant> variantList = parseVariants(variants);
VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
dataRelease, "", cellbaseManagerFactory);
dataRelease, "", cellbaseManagerFactory, configuration);


// Set decompose MNV behaviour
Expand Down Expand Up @@ -195,7 +195,7 @@ public List<CellBaseDataResult<VariantAnnotation>> getAnnotationByVariant(QueryO
}

VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
dataRelease, apiKey, cellbaseManagerFactory);
dataRelease, apiKey, cellbaseManagerFactory, configuration);
List<CellBaseDataResult<VariantAnnotation>> queryResults = variantAnnotationCalculator.getAnnotationByVariantList(variantList,
queryOptions);
return queryResults;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,16 @@
import org.opencb.cellbase.core.api.RepeatsQuery;
import org.opencb.cellbase.core.api.query.LogicalList;
import org.opencb.cellbase.core.api.query.QueryException;
import org.opencb.cellbase.core.config.CellBaseConfiguration;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.models.DataRelease;
import org.opencb.cellbase.core.result.CellBaseDataResult;
import org.opencb.cellbase.core.utils.SpeciesUtils;
import org.opencb.cellbase.lib.EtlCommons;
import org.opencb.cellbase.lib.managers.*;
import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
import org.opencb.cellbase.lib.variant.annotation.futures.FuturePharmacogenomicsAnnotator;
import org.opencb.cellbase.lib.variant.annotation.futures.FutureSpliceScoreAnnotator;
import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator;
import org.opencb.commons.datastore.core.QueryOptions;
import org.slf4j.Logger;
Expand All @@ -53,6 +56,7 @@

import static org.opencb.cellbase.core.ParamConstants.API_KEY_PARAM;
import static org.opencb.cellbase.core.variant.PhasedQueryManager.*;
import static org.opencb.cellbase.lib.EtlCommons.*;

/**
* Created by imedina on 06/02/16.
Expand All @@ -74,11 +78,16 @@ public class VariantAnnotationCalculator {
private RepeatsManager repeatsManager;
private ProteinManager proteinManager;
private PharmacogenomicsManager pharmacogenomicsManager;

private DataRelease dataRelease;
private String apiKey;
private Set<String> annotatorSet;
private List<String> includeGeneFields;

private String species;
private String assembly;
private CellBaseConfiguration configuration;

private final VariantNormalizer normalizer;
private boolean normalize = false;
private boolean decompose = false;
Expand All @@ -99,7 +108,14 @@ public class VariantAnnotationCalculator {
private static Logger logger = LoggerFactory.getLogger(VariantAnnotationCalculator.class);

public VariantAnnotationCalculator(String species, String assembly, DataRelease dataRelease, String apiKey,
CellBaseManagerFactory cellbaseManagerFactory) throws CellBaseException {
CellBaseManagerFactory cellbaseManagerFactory, CellBaseConfiguration configuration)
throws CellBaseException {
logger.debug("VariantAnnotationCalculator: in 'constructor'");

this.species = species;
this.assembly = assembly;
this.configuration = configuration;

this.genomeManager = cellbaseManagerFactory.getGenomeManager(species, assembly);
this.variantManager = cellbaseManagerFactory.getVariantManager(species, assembly);
this.geneManager = cellbaseManagerFactory.getGeneManager(species, assembly);
Expand All @@ -118,9 +134,9 @@ public VariantAnnotationCalculator(String species, String assembly, DataRelease
// at parseQueryParam
this.normalizer = new VariantNormalizer(getNormalizerConfig());

hgvsCalculator = new HgvsCalculator(genomeManager, this.dataRelease.getRelease());
this.hgvsCalculator = new HgvsCalculator(genomeManager, this.dataRelease.getRelease());


logger.debug("VariantAnnotationMongoDBAdaptor: in 'constructor'");
}

private VariantNormalizer.VariantNormalizerConfig getNormalizerConfig() {
Expand Down Expand Up @@ -467,15 +483,15 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar

FutureConservationAnnotator futureConservationAnnotator = null;
Future<List<CellBaseDataResult<Score>>> conservationFuture = null;
if (annotatorSet.contains("conservation")) {
if (SpeciesUtils.hasData(configuration, species, CONSERVATION_DATA) && annotatorSet.contains("conservation")) {
futureConservationAnnotator = new FutureConservationAnnotator(normalizedVariantList, QueryOptions.empty(),
dataRelease.getRelease());
conservationFuture = CACHED_THREAD_POOL.submit(futureConservationAnnotator);
}

FutureVariantFunctionalScoreAnnotator futureVariantFunctionalScoreAnnotator = null;
Future<List<CellBaseDataResult<Score>>> variantFunctionalScoreFuture = null;
if (annotatorSet.contains("functionalScore")) {
if (SpeciesUtils.hasData(configuration, species, VARIATION_FUNCTIONAL_SCORE_DATA) && annotatorSet.contains("functionalScore")) {
futureVariantFunctionalScoreAnnotator = new FutureVariantFunctionalScoreAnnotator(normalizedVariantList, QueryOptions.empty(),
dataRelease.getRelease());
variantFunctionalScoreFuture = CACHED_THREAD_POOL.submit(futureVariantFunctionalScoreAnnotator);
Expand All @@ -484,7 +500,8 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
FutureClinicalAnnotator futureClinicalAnnotator = null;
Future<List<CellBaseDataResult<Variant>>> clinicalFuture = null;
// FIXME "clinical" is deprecated, replaced with traitAssociation
if (annotatorSet.contains("clinical") || annotatorSet.contains("traitAssociation")) {
if (SpeciesUtils.hasData(configuration, species, CLINICAL_VARIANT_DATA)
&& (annotatorSet.contains("clinical") || annotatorSet.contains("traitAssociation"))) {
QueryOptions queryOptions = new QueryOptions();
queryOptions.add(ParamConstants.QueryParams.PHASE.key(), phased);
queryOptions.add(ParamConstants.QueryParams.CHECK_AMINO_ACID_CHANGE.key(), checkAminoAcidChange);
Expand All @@ -495,7 +512,7 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar

FutureRepeatsAnnotator futureRepeatsAnnotator = null;
Future<List<CellBaseDataResult<Repeat>>> repeatsFuture = null;
if (annotatorSet.contains("repeats")) {
if (SpeciesUtils.hasData(configuration, species, REPEATS_DATA) && annotatorSet.contains("repeats")) {
futureRepeatsAnnotator = new FutureRepeatsAnnotator(normalizedVariantList, dataRelease.getRelease());
repeatsFuture = CACHED_THREAD_POOL.submit(futureRepeatsAnnotator);
}
Expand All @@ -509,15 +526,16 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar

FutureSpliceScoreAnnotator futureSpliceScoreAnnotator = null;
Future<List<CellBaseDataResult<SpliceScore>>> spliceScoreFuture = null;
if (annotatorSet.contains("consequenceType")) {
if (SpeciesUtils.hasData(configuration, species, SPLICE_SCORE_DATA) && annotatorSet.contains("consequenceType")) {
futureSpliceScoreAnnotator = new FutureSpliceScoreAnnotator(normalizedVariantList, QueryOptions.empty(),
dataRelease.getRelease());
dataRelease.getRelease(), apiKey, variantManager);
spliceScoreFuture = CACHED_THREAD_POOL.submit(futureSpliceScoreAnnotator);
}

FuturePharmacogenomicsAnnotator futurePharmacogenomicsAnnotator = null;
Future<List<CellBaseDataResult<PharmaChemical>>> pharmacogenomicsFuture = null;
if (annotatorSet.contains("pharmacogenomics") && dataRelease.getCollections().containsKey(EtlCommons.PHARMACOGENOMICS_DATA)) {
if (SpeciesUtils.hasData(configuration, species, PHARMACOGENOMICS_DATA) && annotatorSet.contains("pharmacogenomics")
&& dataRelease.getCollections().containsKey(EtlCommons.PHARMACOGENOMICS_DATA)) {
futurePharmacogenomicsAnnotator = new FuturePharmacogenomicsAnnotator(normalizedVariantList, QueryOptions.empty(),
dataRelease.getRelease(), pharmacogenomicsManager, logger);
pharmacogenomicsFuture = CACHED_THREAD_POOL.submit(futurePharmacogenomicsAnnotator);
Expand Down Expand Up @@ -1584,7 +1602,8 @@ public void processResults(Future<List<CellBaseDataResult<Variant>>> variationFu
}
}

if (annotatorSet.contains("populationFrequencies") && preferredVariant != null) {
if (annotatorSet.contains("populationFrequencies") && preferredVariant != null
&& preferredVariant.getAnnotation() != null) {
variantAnnotationList.get(i)
.setPopulationFrequencies(preferredVariant.getAnnotation().getPopulationFrequencies());
}
Expand Down Expand Up @@ -1909,74 +1928,6 @@ public void processResults(Future<List<CellBaseDataResult<Cytoband>>> cytobandFu
}
}

class FutureSpliceScoreAnnotator implements Callable<List<CellBaseDataResult<SpliceScore>>> {
private List<Variant> variantList;
private QueryOptions queryOptions;
private int dataRelease;

FutureSpliceScoreAnnotator(List<Variant> variantList, QueryOptions queryOptions, int dataRelease) {
this.variantList = variantList;
this.queryOptions = queryOptions;
this.dataRelease = dataRelease;
}

@Override
public List<CellBaseDataResult<SpliceScore>> call() throws Exception {
long startTime = System.currentTimeMillis();

List<CellBaseDataResult<SpliceScore>> cellBaseDataResultList = new ArrayList<>(variantList.size());

logger.debug("Query splice");
// Want to return only one CellBaseDataResult object per Variant
for (Variant variant : variantList) {
cellBaseDataResultList.add(variantManager.getSpliceScoreVariant(variant, apiKey, dataRelease));
}
logger.debug("Splice score query performance is {}ms for {} variants", System.currentTimeMillis() - startTime,
variantList.size());
return cellBaseDataResultList;
}

public void processResults(Future<List<CellBaseDataResult<SpliceScore>>> spliceFuture,
List<VariantAnnotation> variantAnnotationList)
throws InterruptedException, ExecutionException {
List<CellBaseDataResult<SpliceScore>> spliceCellBaseDataResults;
try {
spliceCellBaseDataResults = spliceFuture.get(30, TimeUnit.SECONDS);
} catch (TimeoutException e) {
spliceFuture.cancel(true);
throw new ExecutionException("Unable to finish splice score query on time", e);
}

if (CollectionUtils.isNotEmpty(spliceCellBaseDataResults)) {
for (int i = 0; i < variantAnnotationList.size(); i++) {
CellBaseDataResult<SpliceScore> spliceScoreResult = spliceCellBaseDataResults.get(i);
if (spliceScoreResult != null && CollectionUtils.isNotEmpty(spliceScoreResult.getResults())) {
for (SpliceScore spliceScore : spliceScoreResult.getResults()) {
for (ConsequenceType ct : variantAnnotationList.get(i).getConsequenceTypes()) {
for (SpliceScoreAlternate spliceScoreAlt : spliceScore.getAlternates()) {
String alt = StringUtils.isEmpty(variantAnnotationList.get(i).getAlternate())
? "-"
: variantAnnotationList.get(i).getAlternate();
if (alt.equals(spliceScoreAlt.getAltAllele())) {
if (StringUtils.isEmpty(spliceScore.getTranscriptId())
|| StringUtils.isEmpty(ct.getTranscriptId())
|| spliceScore.getTranscriptId().equals(ct.getTranscriptId())) {
SpliceScores scores = new SpliceScores(spliceScore.getSource(), spliceScoreAlt.getScores());
if (ct.getSpliceScores() == null) {
ct.setSpliceScores(new ArrayList<>());
}
ct.getSpliceScores().add(scores);
}
}
}
}
}
}
}
}
}
}

public VariantNormalizer getNormalizer() {
return normalizer;
}
Expand Down
Loading

0 comments on commit efa4824

Please sign in to comment.