Skip to content

Commit

Permalink
Merge pull request #2527 from opencb/TASK-4682
Browse files Browse the repository at this point in the history
TASK-4682 - Error in VariantExport VCF : Duplicate allele (pre-step to exomiser)
  • Loading branch information
j-coll authored Nov 25, 2024
2 parents febe81b + f23b350 commit fa5d405
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 82 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,14 @@ private List<ClinicalVariant> getPrimaryFindings() throws IOException, StorageEn
fields[18]);
try {
Variant normalized = normalizer.normalize(Collections.singletonList(variant), false).get(0);
String variantId = normalized.toStringSimple();
normalizedToTsv.put(variantId, variant.toStringSimple());
String variantId = normalized.toString();
normalizedToTsv.put(variantId, variant.toString());
if (!variantTsvMap.containsKey(variantId)) {
variantTsvMap.put(variantId, new ArrayList<>());
}
variantTsvMap.get(variantId).add(fields);
} catch (NonStandardCompliantSampleField e) {
logger.warn("Skipping variant {}, it could not be normalized", variant.toStringSimple());
logger.warn("Skipping variant {}, it could not be normalized", variant.toString());
}

// Next line
Expand All @@ -275,8 +275,8 @@ private List<ClinicalVariant> getPrimaryFindings() throws IOException, StorageEn
for (Variant variant : variantResults.getResults()) {
ClinicalVariant clinicalVariant = clinicalVariantCreator.create(variant);
List<ExomiserTranscriptAnnotation> exomiserTranscripts = new ArrayList<>(variantTranscriptMap.get(normalizedToTsv
.get(variant.toStringSimple())));
for (String[] fields : variantTsvMap.get(variant.toStringSimple())) {
.get(variant.toString())));
for (String[] fields : variantTsvMap.get(variant.toString())) {
ClinicalProperty.ModeOfInheritance moi = getModeOfInheritance(fields[4]);
Map<String, Object> attributes = getAttributesFromTsv(fields);

Expand All @@ -301,8 +301,13 @@ private Map<String, Set<ExomiserTranscriptAnnotation>> getExomiserTranscriptAnno
if (geneScore.containsKey("contributingVariants")) {
List<Map<String, Object>> contributingVariants = (ArrayList) geneScore.get("contributingVariants");
for (Map<String, Object> contributingVariant : contributingVariants) {
String variantId = contributingVariant.get("contigName") + ":" + contributingVariant.get("start") + ":"
+ contributingVariant.get("ref") + ":" + contributingVariant.get("alt");
String variantId = new Variant(
contributingVariant.get("contigName").toString(),
((Number) contributingVariant.get("start")).intValue(),
((Number) contributingVariant.get("end")).intValue(),
contributingVariant.get("ref").toString(),
contributingVariant.get("alt").toString())
.toString();
if (!results.containsKey(variantId)) {
results.put(variantId, new HashSet<>());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@
import org.opencb.biodata.models.variant.metadata.VariantFileHeader;
import org.opencb.biodata.models.variant.metadata.VariantMetadata;
import org.opencb.biodata.models.variant.metadata.VariantStudyMetadata;
import org.opencb.biodata.models.variant.protobuf.VariantProto;
import org.opencb.opencga.core.response.RestResponse;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.io.VcfDataWriter;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;

import java.io.PrintStream;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

/**
Expand All @@ -39,66 +37,47 @@ public void print(RestResponse queryResponse) {
if (checkErrors(queryResponse)) {
return;
}
print(new VariantQueryResult<Variant>(queryResponse.first()), null);
print(new VariantQueryResult<Variant>(queryResponse.first()));
}

public void print(VariantQueryResult<Variant> variantQueryResult) {
print(variantQueryResult, null);
}

public void print(Iterator<VariantProto.Variant> variantIterator) {
print(null, variantIterator);
}

private void print(VariantQueryResult<Variant> variantQueryResult, Iterator<VariantProto.Variant> variantIterator) {
if (variantQueryResult != null) {
if (metadata.getStudies().isEmpty()) {
// If excluding studies, we need to create a dummy study.
metadata.getStudies().add(VariantStudyMetadata
.newBuilder()
.setId("any")
.setSampleSetType(SampleSetType.UNKNOWN)
.setAggregatedHeader(VariantFileHeader
.newBuilder()
.setVersion("")
.build())
.build());
}
String study = metadata.getStudies().get(0).getId();
VcfDataWriter<Variant> writer = VcfDataWriter.newWriterForAvro(metadata, annotations, outputStream);
if (variantQueryResult.getSamples() != null) {
writer.setSamples(variantQueryResult.getSamples().get(study));
if (metadata.getStudies().isEmpty()) {
// If excluding studies, we need to create a dummy study.
metadata.getStudies().add(VariantStudyMetadata
.newBuilder()
.setId("any")
.setSampleSetType(SampleSetType.UNKNOWN)
.setAggregatedHeader(VariantFileHeader
.newBuilder()
.setVersion("")
.build())
.build());
}
String study = metadata.getStudies().get(0).getId();
VcfDataWriter<Variant> writer = VcfDataWriter.newWriterForAvro(metadata, annotations, outputStream);
if (variantQueryResult.getSamples() != null) {
writer.setSamples(variantQueryResult.getSamples().get(study));
}
writer.open();
writer.pre();
for (Variant variant : variantQueryResult.getResults()) {
// FIXME: The server may be returning the StudyEntry with a different name
String shortStudy = study.substring(study.lastIndexOf(':') + 1, study.length());
if (variant.getStudy(study) == null && variant.getStudy(shortStudy) != null) {
variant.addStudyEntry(variant.getStudy(shortStudy).setStudyId(study));
}
writer.open();
writer.pre();
for (Variant variant : variantQueryResult.getResults()) {
// FIXME: The server may be returning the StudyEntry with a different name
String shortStudy = study.substring(study.lastIndexOf(':') + 1, study.length());
if (variant.getStudy(study) == null && variant.getStudy(shortStudy) != null) {
variant.addStudyEntry(variant.getStudy(shortStudy).setStudyId(study));
}

// FIXME: This should not be needed! VariantAvroToVariantContextConverter must be fixed
if (variant.getStudy(study) == null) {
StudyEntry studyEntry = new StudyEntry(study);
studyEntry.getFiles().add(new FileEntry("", null, Collections.emptyMap()));
variant.addStudyEntry(studyEntry);
}
writer.write(variant);
// FIXME: This should not be needed! VariantAvroToVariantContextConverter must be fixed
if (variant.getStudy(study) == null) {
StudyEntry studyEntry = new StudyEntry(study);
studyEntry.getFiles().add(new FileEntry("", null, Collections.emptyMap()));
variant.addStudyEntry(studyEntry);
}
writer.post();
writer.close();
} else {
VcfDataWriter<VariantProto.Variant> writer = VcfDataWriter.newWriterForProto(metadata, annotations, outputStream);
writer.open();
writer.pre();
while (variantIterator.hasNext()) {
VariantProto.Variant next = variantIterator.next();
writer.write(next);
}
writer.post();
writer.close();
writer.write(variant);
}
writer.post();
writer.close();

outputStream.close();

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
import org.opencb.biodata.models.metadata.Sample;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.metadata.VariantMetadata;
import org.opencb.biodata.models.variant.protobuf.VariantProto;
import org.opencb.biodata.tools.variant.converters.VariantContextConverter;
import org.opencb.biodata.tools.variant.converters.avro.VariantAvroToVariantContextConverter;
import org.opencb.biodata.tools.variant.converters.avro.VariantStudyMetadataToVCFHeaderConverter;
import org.opencb.biodata.tools.variant.converters.proto.VariantProtoToVariantContextConverter;
import org.opencb.commons.io.DataWriter;

import java.io.OutputStream;
Expand Down Expand Up @@ -45,11 +43,6 @@ public static VcfDataWriter<Variant> newWriterForAvro(VariantMetadata metadata,
return new VariantVcfDataWriter(metadata, annotations, outputStream);
}

public static VcfDataWriter<VariantProto.Variant> newWriterForProto(VariantMetadata metadata, List<String> annotations,
OutputStream outputStream) {
return new VariantProtoVcfDataWriter(metadata, annotations, outputStream);
}

private static class VariantVcfDataWriter extends VcfDataWriter<Variant> {

VariantVcfDataWriter(VariantMetadata metadata, List<String> annotations, OutputStream outputStream) {
Expand All @@ -62,18 +55,6 @@ public VariantContextConverter<Variant> newConverter(String study, List<String>
}
}

private static class VariantProtoVcfDataWriter extends VcfDataWriter<VariantProto.Variant> {

VariantProtoVcfDataWriter(VariantMetadata metadata, List<String> annotations, OutputStream outputStream) {
super(metadata, annotations, outputStream);
}

@Override
public VariantContextConverter<VariantProto.Variant> newConverter(String study, List<String> samples, List<String> annotations) {
return new VariantProtoToVariantContextConverter(study, samples, annotations);
}
}

public VcfDataWriter<T> setSamples(List<String> samples) {
this.samples = samples;
return this;
Expand Down

0 comments on commit fa5d405

Please sign in to comment.