From 3a708b6b909882687cffb8fdec266c89253ac4ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 6 Nov 2024 12:02:20 +0000 Subject: [PATCH 1/2] storage: Remove usages of VariantProtoToVariantContextConverter. #TASK-4682 --- .../app/cli/main/io/VcfOutputWriter.java | 91 +++++++------------ .../core/variant/io/VcfDataWriter.java | 19 ---- 2 files changed, 35 insertions(+), 75 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/io/VcfOutputWriter.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/io/VcfOutputWriter.java index 0c398dd2ef6..3d6bd1f4057 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/io/VcfOutputWriter.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/io/VcfOutputWriter.java @@ -7,14 +7,12 @@ import org.opencb.biodata.models.variant.metadata.VariantFileHeader; import org.opencb.biodata.models.variant.metadata.VariantMetadata; import org.opencb.biodata.models.variant.metadata.VariantStudyMetadata; -import org.opencb.biodata.models.variant.protobuf.VariantProto; import org.opencb.opencga.core.response.RestResponse; -import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.io.VcfDataWriter; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import java.io.PrintStream; import java.util.Collections; -import java.util.Iterator; import java.util.List; /** @@ -39,66 +37,47 @@ public void print(RestResponse queryResponse) { if (checkErrors(queryResponse)) { return; } - print(new VariantQueryResult(queryResponse.first()), null); + print(new VariantQueryResult(queryResponse.first())); } public void print(VariantQueryResult variantQueryResult) { - print(variantQueryResult, null); - } - - public void print(Iterator variantIterator) { - print(null, variantIterator); - } - - private void print(VariantQueryResult variantQueryResult, Iterator variantIterator) { - if (variantQueryResult != null) { - if (metadata.getStudies().isEmpty()) { - // If excluding studies, we need to create a dummy study. - metadata.getStudies().add(VariantStudyMetadata - .newBuilder() - .setId("any") - .setSampleSetType(SampleSetType.UNKNOWN) - .setAggregatedHeader(VariantFileHeader - .newBuilder() - .setVersion("") - .build()) - .build()); - } - String study = metadata.getStudies().get(0).getId(); - VcfDataWriter writer = VcfDataWriter.newWriterForAvro(metadata, annotations, outputStream); - if (variantQueryResult.getSamples() != null) { - writer.setSamples(variantQueryResult.getSamples().get(study)); + if (metadata.getStudies().isEmpty()) { + // If excluding studies, we need to create a dummy study. + metadata.getStudies().add(VariantStudyMetadata + .newBuilder() + .setId("any") + .setSampleSetType(SampleSetType.UNKNOWN) + .setAggregatedHeader(VariantFileHeader + .newBuilder() + .setVersion("") + .build()) + .build()); + } + String study = metadata.getStudies().get(0).getId(); + VcfDataWriter writer = VcfDataWriter.newWriterForAvro(metadata, annotations, outputStream); + if (variantQueryResult.getSamples() != null) { + writer.setSamples(variantQueryResult.getSamples().get(study)); + } + writer.open(); + writer.pre(); + for (Variant variant : variantQueryResult.getResults()) { + // FIXME: The server may be returning the StudyEntry with a different name + String shortStudy = study.substring(study.lastIndexOf(':') + 1, study.length()); + if (variant.getStudy(study) == null && variant.getStudy(shortStudy) != null) { + variant.addStudyEntry(variant.getStudy(shortStudy).setStudyId(study)); } - writer.open(); - writer.pre(); - for (Variant variant : variantQueryResult.getResults()) { - // FIXME: The server may be returning the StudyEntry with a different name - String shortStudy = study.substring(study.lastIndexOf(':') + 1, study.length()); - if (variant.getStudy(study) == null && variant.getStudy(shortStudy) != null) { - variant.addStudyEntry(variant.getStudy(shortStudy).setStudyId(study)); - } - // FIXME: This should not be needed! VariantAvroToVariantContextConverter must be fixed - if (variant.getStudy(study) == null) { - StudyEntry studyEntry = new StudyEntry(study); - studyEntry.getFiles().add(new FileEntry("", null, Collections.emptyMap())); - variant.addStudyEntry(studyEntry); - } - writer.write(variant); + // FIXME: This should not be needed! VariantAvroToVariantContextConverter must be fixed + if (variant.getStudy(study) == null) { + StudyEntry studyEntry = new StudyEntry(study); + studyEntry.getFiles().add(new FileEntry("", null, Collections.emptyMap())); + variant.addStudyEntry(studyEntry); } - writer.post(); - writer.close(); - } else { - VcfDataWriter writer = VcfDataWriter.newWriterForProto(metadata, annotations, outputStream); - writer.open(); - writer.pre(); - while (variantIterator.hasNext()) { - VariantProto.Variant next = variantIterator.next(); - writer.write(next); - } - writer.post(); - writer.close(); + writer.write(variant); } + writer.post(); + writer.close(); + outputStream.close(); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VcfDataWriter.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VcfDataWriter.java index 0d9533d51a6..71e3756a97f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VcfDataWriter.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/io/VcfDataWriter.java @@ -8,11 +8,9 @@ import org.opencb.biodata.models.metadata.Sample; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.metadata.VariantMetadata; -import org.opencb.biodata.models.variant.protobuf.VariantProto; import org.opencb.biodata.tools.variant.converters.VariantContextConverter; import org.opencb.biodata.tools.variant.converters.avro.VariantAvroToVariantContextConverter; import org.opencb.biodata.tools.variant.converters.avro.VariantStudyMetadataToVCFHeaderConverter; -import org.opencb.biodata.tools.variant.converters.proto.VariantProtoToVariantContextConverter; import org.opencb.commons.io.DataWriter; import java.io.OutputStream; @@ -45,11 +43,6 @@ public static VcfDataWriter newWriterForAvro(VariantMetadata metadata, return new VariantVcfDataWriter(metadata, annotations, outputStream); } - public static VcfDataWriter newWriterForProto(VariantMetadata metadata, List annotations, - OutputStream outputStream) { - return new VariantProtoVcfDataWriter(metadata, annotations, outputStream); - } - private static class VariantVcfDataWriter extends VcfDataWriter { VariantVcfDataWriter(VariantMetadata metadata, List annotations, OutputStream outputStream) { @@ -62,18 +55,6 @@ public VariantContextConverter newConverter(String study, List } } - private static class VariantProtoVcfDataWriter extends VcfDataWriter { - - VariantProtoVcfDataWriter(VariantMetadata metadata, List annotations, OutputStream outputStream) { - super(metadata, annotations, outputStream); - } - - @Override - public VariantContextConverter newConverter(String study, List samples, List annotations) { - return new VariantProtoToVariantContextConverter(study, samples, annotations); - } - } - public VcfDataWriter setSamples(List samples) { this.samples = samples; return this; From f23b35015cba3bdd06c5721777c9a61bf1673de2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 6 Nov 2024 12:02:34 +0000 Subject: [PATCH 2/2] analysis: Fix NPE parsing exomiser results for symbolic variants. #TASK-4682 --- .../ExomiserInterpretationAnalysis.java | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/exomiser/ExomiserInterpretationAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/exomiser/ExomiserInterpretationAnalysis.java index 55b291f156e..694e811e8c7 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/exomiser/ExomiserInterpretationAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/exomiser/ExomiserInterpretationAnalysis.java @@ -241,14 +241,14 @@ private List getPrimaryFindings() throws IOException, StorageEn fields[18]); try { Variant normalized = normalizer.normalize(Collections.singletonList(variant), false).get(0); - String variantId = normalized.toStringSimple(); - normalizedToTsv.put(variantId, variant.toStringSimple()); + String variantId = normalized.toString(); + normalizedToTsv.put(variantId, variant.toString()); if (!variantTsvMap.containsKey(variantId)) { variantTsvMap.put(variantId, new ArrayList<>()); } variantTsvMap.get(variantId).add(fields); } catch (NonStandardCompliantSampleField e) { - logger.warn("Skipping variant {}, it could not be normalized", variant.toStringSimple()); + logger.warn("Skipping variant {}, it could not be normalized", variant.toString()); } // Next line @@ -275,8 +275,8 @@ private List getPrimaryFindings() throws IOException, StorageEn for (Variant variant : variantResults.getResults()) { ClinicalVariant clinicalVariant = clinicalVariantCreator.create(variant); List exomiserTranscripts = new ArrayList<>(variantTranscriptMap.get(normalizedToTsv - .get(variant.toStringSimple()))); - for (String[] fields : variantTsvMap.get(variant.toStringSimple())) { + .get(variant.toString()))); + for (String[] fields : variantTsvMap.get(variant.toString())) { ClinicalProperty.ModeOfInheritance moi = getModeOfInheritance(fields[4]); Map attributes = getAttributesFromTsv(fields); @@ -301,8 +301,13 @@ private Map> getExomiserTranscriptAnno if (geneScore.containsKey("contributingVariants")) { List> contributingVariants = (ArrayList) geneScore.get("contributingVariants"); for (Map contributingVariant : contributingVariants) { - String variantId = contributingVariant.get("contigName") + ":" + contributingVariant.get("start") + ":" - + contributingVariant.get("ref") + ":" + contributingVariant.get("alt"); + String variantId = new Variant( + contributingVariant.get("contigName").toString(), + ((Number) contributingVariant.get("start")).intValue(), + ((Number) contributingVariant.get("end")).intValue(), + contributingVariant.get("ref").toString(), + contributingVariant.get("alt").toString()) + .toString(); if (!results.containsKey(variantId)) { results.put(variantId, new HashSet<>()); }