From b7c825b84f64a976f665411130611f48d9871fdd Mon Sep 17 00:00:00 2001 From: Vasiliy Strelnikov Date: Wed, 28 Aug 2024 20:36:24 +0000 Subject: [PATCH] Add deepsomatic tool --- conf/modules/deepsomatic.config | 43 ++++++ modules.json | 5 + modules/nf-core/deepsomatic/main.nf | 71 ++++++++++ modules/nf-core/deepsomatic/meta.yml | 128 ++++++++++++++++++ .../nf-core/deepsomatic/tests/main.nf.test | 59 ++++++++ .../deepsomatic/tests/main.nf.test.snap | 20 +++ modules/nf-core/deepsomatic/tests/tags.yml | 2 + nextflow.config | 1 + nextflow_schema.json | 2 +- .../bam_variant_calling_deepsomatic/main.nf | 75 ++++++++++ .../bam_variant_calling_somatic_all/main.nf | 14 ++ 11 files changed, 419 insertions(+), 1 deletion(-) create mode 100644 conf/modules/deepsomatic.config create mode 100644 modules/nf-core/deepsomatic/main.nf create mode 100644 modules/nf-core/deepsomatic/meta.yml create mode 100644 modules/nf-core/deepsomatic/tests/main.nf.test create mode 100644 modules/nf-core/deepsomatic/tests/main.nf.test.snap create mode 100644 modules/nf-core/deepsomatic/tests/tags.yml create mode 100644 subworkflows/local/bam_variant_calling_deepsomatic/main.nf diff --git a/conf/modules/deepsomatic.config b/conf/modules/deepsomatic.config new file mode 100644 index 0000000000..564720530d --- /dev/null +++ b/conf/modules/deepsomatic.config @@ -0,0 +1,43 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// DEEPVARIANT + +process { + + withName: 'DEEPSOMATIC' { + ext.args = { params.wes ? "--model_type WES" : "--model_type WGS" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.deepsomatic" : "${meta.id}.deepsomatic.${intervals.baseName}" } + ext.when = { params.tools && params.tools.split(',').contains('deepsomatic') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "deepsomatic/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_DEEPSOMATIC_.*' { + ext.prefix = { "${meta.id}.deepsomatic" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/deepsomatic/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'MERGE_DEEPSOMATIC_GVCF' { + ext.prefix = { "${meta.id}.deepsomatic.g" } + } + +} diff --git a/modules.json b/modules.json index 26d801647b..1a8aa2b85f 100644 --- a/modules.json +++ b/modules.json @@ -122,6 +122,11 @@ "git_sha": "c3f338377c177a01847eeea2f77da33ce89f92e6", "installed_by": ["modules"] }, + "deepsomatic": { + "branch": "master", + "git_sha": "89e06f63c5ee7abe5d6910277aff8e83b00a8b50", + "installed_by": ["modules"] + }, "deepvariant": { "branch": "master", "git_sha": "199ba086a259e1933d6e0ab7596e4a977bbd483a", diff --git a/modules/nf-core/deepsomatic/main.nf b/modules/nf-core/deepsomatic/main.nf new file mode 100644 index 0000000000..fb5b8bc4eb --- /dev/null +++ b/modules/nf-core/deepsomatic/main.nf @@ -0,0 +1,71 @@ +process DEEPSOMATIC { + tag "$meta.id" + label 'process_high' + + container "docker.io/google/deepsomatic:1.7.0" + input: + tuple val(meta), path(input_normal), path(index_normal), path(input_tumor), path(index_tumor) + tuple val(meta2), path(intervals) + tuple val(meta3), path(fasta) + tuple val(meta4), path(fai) + tuple val(meta5), path(gzi) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: vcf_tbi + tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf + tuple val(meta), path("${prefix}.g.vcf.gz.tbi"), emit: gvcf_tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def regions = intervals ? "--regions=${intervals}" : "" + def VERSION = '1.7.0' + + """ + run_deepsomatic \\ + --ref=${fasta} \\ + --reads_normal=${input_normal} \\ + --reads_tumor=${input_tumor} \\ + --output_vcf=${prefix}.vcf.gz \\ + --output_gvcf=${prefix}.g.vcf.gz \\ + --sample_name_tumor="tumor" \\ + --sample_name_normal="normal" \\ + ${args} \\ + ${regions} \\ + --intermediate_results_dir=tmp \\ + --num_shards=${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepsomatic: $VERSION + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.7.0' + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + touch ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepsomatic: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/deepsomatic/meta.yml b/modules/nf-core/deepsomatic/meta.yml new file mode 100644 index 0000000000..434c0f5cc9 --- /dev/null +++ b/modules/nf-core/deepsomatic/meta.yml @@ -0,0 +1,128 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "deepsomatic" +description: DeepSomatic is an extension of deep learning-based variant caller DeepVariant + that takes aligned reads (in BAM or CRAM format) from tumor and normal data, produces pileup + image tensors from them, classifies each tensor using a convolutional neural network, and + finally reports somatic variants in a standard VCF or gVCF file. +keywords: + - variant calling + - machine learning + - neural network +tools: + - "deepsomatic": + description: "" + homepage: "https://github.com/google/deepsomatic" + documentation: "https://github.com/google/deepsomatic" + tool_dev_url: "https://github.com/google/deepsomatic" + doi: "10.1101/2024.08.16.608331" + licence: ["BSD-3-clause"] + identifier: "biotools:deepsomatic" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_normal: + type: file + description: BAM/CRAM file + pattern: "*.bam/cram" + - index_normal: + type: file + description: Index of BAM/CRAM file + pattern: "*.bai/crai" + - input_tumor: + type: file + description: BAM/CRAM file + pattern: "*.bam/cram" + - index_tumor: + type: file + description: Index of BAM/CRAM file + pattern: "*.bai/crai" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: file containing intervals + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fai" + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gzi: + type: file + description: GZI index of reference fasta file + pattern: "*.gzi" + +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "${prefix}.vcf.gz": + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - vcf_tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "${prefix}.vcf.gz.tbi": + type: file + description: Index of compressed VCF file + pattern: "*.vcf.gz.tbi" + - gvcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "${prefix}.g.vcf.gz": + type: file + description: Compressed GVCF file + pattern: "*.g.vcf.gz" + - gvcf_tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "${prefix}.g.vcf.gz.tbi": + type: file + description: Index of compressed Genotyped VCF file + pattern: "*.g.vcf.gz.tbi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@vaxyzek" +maintainers: + - "@vaxyzek" diff --git a/modules/nf-core/deepsomatic/tests/main.nf.test b/modules/nf-core/deepsomatic/tests/main.nf.test new file mode 100644 index 0000000000..eaa5f8fe6d --- /dev/null +++ b/modules/nf-core/deepsomatic/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process DEEPSOMATIC" + script "../main.nf" + process "DEEPSOMATIC" + + tag "modules" + tag "modules_nfcore" + tag "deepsomatic" + + test("tumor_normal_pair") { + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'tumor_vs_normal' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'intervals' ], + [] + ] + input[2] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) + ] + input[4] = [ + [ id: 'gzi' ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.versions, + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/deepsomatic/tests/main.nf.test.snap b/modules/nf-core/deepsomatic/tests/main.nf.test.snap new file mode 100644 index 0000000000..1a7886ec97 --- /dev/null +++ b/modules/nf-core/deepsomatic/tests/main.nf.test.snap @@ -0,0 +1,20 @@ +{ + "tumor_normal_pair": { + "content": [ + [ + "tumor_vs_normal_out.vcf.gz" + ], + [ + + ], + [ + "versions.yml:md5,d64cbd049771dd1a8d0885499ea16f11" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-10-22T21:52:00.932502018" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepsomatic/tests/tags.yml b/modules/nf-core/deepsomatic/tests/tags.yml new file mode 100644 index 0000000000..a8bf10c9f3 --- /dev/null +++ b/modules/nf-core/deepsomatic/tests/tags.yml @@ -0,0 +1,2 @@ +deepsomatic: + - "modules/nf-core/deepsomatic/**" diff --git a/nextflow.config b/nextflow.config index de95ae8c86..7bbd0170a2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -428,6 +428,7 @@ includeConfig 'conf/modules/ascat.config' includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/controlfreec.config' includeConfig 'conf/modules/deepvariant.config' +includeConfig 'conf/modules/deepsomatic.config' includeConfig 'conf/modules/freebayes.config' includeConfig 'conf/modules/haplotypecaller.config' includeConfig 'conf/modules/joint_germline.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 1611d58f40..7358a5b2e7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -112,7 +112,7 @@ "fa_icon": "fas fa-toolbox", "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? [ meta + [ num_intervals:num_intervals ], cram_n, crai_n, cram_t, crai_t ]} + + // Convert [intervals, num_intervals] to [meta, intervals] with an empty meta + intervals_only = intervals.map { intervals, num_intervals -> [[], intervals]} + + DEEPSOMATIC(cram_normal_tumor_intervals, intervals_only, fasta, fasta_fai, [ [ id:'null' ], [] ]) + + // // Figuring out if there is one or more vcf(s) from the same sample + vcf_out = DEEPSOMATIC.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more gvcf(s) from the same sample + gvcf_out = DEEPSOMATIC.out.gvcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // // Only when using intervals + gvcf_to_merge = gvcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + vcf_to_merge = vcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + + MERGE_DEEPSOMATIC_GVCF(gvcf_to_merge, dict) + MERGE_DEEPSOMATIC_VCF(vcf_to_merge, dict) + + gvcf = Channel.empty() + // Mix intervals and no_intervals channels together + gvcf = Channel.empty().mix(MERGE_DEEPSOMATIC_GVCF.out.vcf, gvcf_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'deepsomatic' ], vcf ] } + + vcf = Channel.empty() + // Mix intervals and no_intervals channels together + vcf = Channel.empty().mix(MERGE_DEEPSOMATIC_VCF.out.vcf, vcf_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'deepsomatic' ], vcf ] } + + versions = versions.mix(DEEPSOMATIC.out.versions) + // versions = versions.mix(MERGE_DEEPSOMATIC_GVCF.out.versions) + versions = versions.mix(MERGE_DEEPSOMATIC_VCF.out.versions) + + emit: + gvcf + vcf + versions +} diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index cdfabfc3ac..3f1449dcce 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -3,6 +3,7 @@ // include { BAM_VARIANT_CALLING_CNVKIT } from '../bam_variant_calling_cnvkit/main' +include { BAM_VARIANT_CALLING_DEEPSOMATIC } from '../bam_variant_calling_deepsomatic/main' include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_calling_freebayes/main' include { BAM_VARIANT_CALLING_MPILEUP as MPILEUP_NORMAL } from '../bam_variant_calling_mpileup/main' include { BAM_VARIANT_CALLING_MPILEUP as MPILEUP_TUMOR } from '../bam_variant_calling_mpileup/main' @@ -127,6 +128,19 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { versions = versions.mix(BAM_VARIANT_CALLING_CNVKIT.out.versions) } + if (tools.split(',').contains('deepsomatic')) { + BAM_VARIANT_CALLING_DEEPSOMATIC( + // Remap channel to match module/subworkflow + cram, + dict, + fasta, + fasta_fai, + intervals + ) + + versions = versions.mix(BAM_VARIANT_CALLING_DEEPSOMATIC.out.versions) + } + // FREEBAYES if (tools.split(',').contains('freebayes')) { BAM_VARIANT_CALLING_FREEBAYES(