diff --git a/conf/modules/deepsomatic.config b/conf/modules/deepsomatic.config new file mode 100644 index 0000000000..a2ec130c1e --- /dev/null +++ b/conf/modules/deepsomatic.config @@ -0,0 +1,43 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// DEEPVARIANT + +process { + + withName: 'DEEPSOMATIC' { + ext.args = { params.wes ? "--model_type WES" : "--model_type WGS" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.deepsomatic" : "${meta.id}.deepsomatic.${intervals.baseName}" } + ext.when = { params.tools && params.tools.split(',').contains('deepsomatic') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "deepsomatic/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_DEEPSOMATIC_.*' { + ext.prefix = { "${meta.id}.deepsomatic" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/deepsomatic/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'MERGE_DEEPSOMATIC__GVCF' { + ext.prefix = { "${meta.id}.deepsomatic.g" } + } + +} diff --git a/modules/nf-core/deepsomatic/environment.yml b/modules/nf-core/deepsomatic/environment.yml new file mode 100644 index 0000000000..45ecb999e4 --- /dev/null +++ b/modules/nf-core/deepsomatic/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "deepsomatic" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "YOUR-TOOL-HERE" diff --git a/modules/nf-core/deepsomatic/main.nf b/modules/nf-core/deepsomatic/main.nf new file mode 100644 index 0000000000..3f676063f4 --- /dev/null +++ b/modules/nf-core/deepsomatic/main.nf @@ -0,0 +1,74 @@ +process DEEPSOMATIC { + tag "$meta.id" + label 'process_high' + + container "docker.io/google/deepsomatic:1.6.1" + + input: + tuple val(meta), path(input_normal), path(index_normal) + tuple val(meta2), path(input_tumor), path(index_tumor) + tuple val(meta3), path(intervals) + tuple val(meta4), path(fasta) + tuple val(meta5), path(fai) + tuple val(meta6), path(gzi) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: vcf_tbi + tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf + tuple val(meta), path("${prefix}.g.vcf.gz.tbi"), emit: gvcf_tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def regions = intervals ? "--regions=${intervals}" : "" + def VERSION = '1.6.1' + + """ + run_deepsomatic \\ + --ref=${fasta} \\ + --reads_normal=${input_normal} \\ + --reads_tumor=${input_tumor} \\ + --output_vcf=${prefix}.vcf.gz \\ + --output_gvcf=${prefix}.g.vcf.gz \\ + --sample_name_tumor="tumor" \\ + --sample_name_normal="normal" \\ + ${args} \\ + ${regions} \\ + --intermediate_results_dir=tmp \\ + --num_shards=${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepsomatic: $VERSION + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + + def VERSION = '1.6.1' + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + touch ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepsomatic: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/deepsomatic/meta.yml b/modules/nf-core/deepsomatic/meta.yml new file mode 100644 index 0000000000..ffbd58146c --- /dev/null +++ b/modules/nf-core/deepsomatic/meta.yml @@ -0,0 +1,76 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "deepsomatic" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - variant calling + - machine learning + - neural network +tools: + - "deepsomatic": + ## TODO nf-core: Add a description and other details for the software below + description: "" + homepage: "" + documentation: "" + tool_dev_url: "" + doi: "" + licence: + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - input_normal: + type: file + description: BAM/CRAM file + pattern: "*.bam/cram" + - index_normal: + type: file + description: Index of BAM/CRAM file + pattern: "*.bai/crai" + - input_tumor: + type: file + description: BAM/CRAM file + pattern: "*.bam/cram" + - index_tumor: + type: file + description: Index of BAM/CRAM file + pattern: "*.bai/crai" + + - intervals: + type: file + description: Interval file for targeted regions + pattern: "*.bed" + + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@vaxyzek" +maintainers: + - "@vaxyzek" diff --git a/modules/nf-core/deepsomatic/tests/main.nf.test b/modules/nf-core/deepsomatic/tests/main.nf.test new file mode 100644 index 0000000000..95f0510e62 --- /dev/null +++ b/modules/nf-core/deepsomatic/tests/main.nf.test @@ -0,0 +1,73 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test deepsomatic +nextflow_process { + + name "Test Process DEEPSOMATIC" + script "../main.nf" + process "DEEPSOMATIC" + + tag "modules" + tag "modules_nfcore" + tag "deepsomatic" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + + } + +} diff --git a/modules/nf-core/deepsomatic/tests/main.nf.test.snap b/modules/nf-core/deepsomatic/tests/main.nf.test.snap new file mode 100644 index 0000000000..9e693386d2 --- /dev/null +++ b/modules/nf-core/deepsomatic/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "bam": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-21T17:53:00.524660985" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "bam": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-21T17:52:54.704070521" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepsomatic/tests/tags.yml b/modules/nf-core/deepsomatic/tests/tags.yml new file mode 100644 index 0000000000..a8bf10c9f3 --- /dev/null +++ b/modules/nf-core/deepsomatic/tests/tags.yml @@ -0,0 +1,2 @@ +deepsomatic: + - "modules/nf-core/deepsomatic/**" diff --git a/nextflow.config b/nextflow.config index de95ae8c86..7bbd0170a2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -428,6 +428,7 @@ includeConfig 'conf/modules/ascat.config' includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/controlfreec.config' includeConfig 'conf/modules/deepvariant.config' +includeConfig 'conf/modules/deepsomatic.config' includeConfig 'conf/modules/freebayes.config' includeConfig 'conf/modules/haplotypecaller.config' includeConfig 'conf/modules/joint_germline.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 1611d58f40..7358a5b2e7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -112,7 +112,7 @@ "fa_icon": "fas fa-toolbox", "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? [ meta + [ num_intervals:num_intervals ], cram, crai ]} + + // Convert [intervals, num_intervals] to [meta, intervals] with an empty meta + intervals_only = intervals.map { intervals, num_intervals -> [[], intervals]} + + DEEPSOMATIC(cram_normal_intervals, cram_tumor, intervals_only, fasta, fasta_fai, [ [ id:'null' ], [] ]) + + // // Figuring out if there is one or more vcf(s) from the same sample + // vcf_out = DEEPSOMATIC.out.vcf.branch{ + // // Use meta.num_intervals to asses number of intervals + // intervals: it[0].num_intervals > 1 + // no_intervals: it[0].num_intervals <= 1 + // } + + // // Figuring out if there is one or more gvcf(s) from the same sample + // gvcf_out = DEEPSOMATIC.out.gvcf.branch{ + // // Use meta.num_intervals to asses number of intervals + // intervals: it[0].num_intervals > 1 + // no_intervals: it[0].num_intervals <= 1 + // } + + // // Only when using intervals + // gvcf_to_merge = gvcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + // vcf_to_merge = vcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + + // MERGE_DEEPSOMATIC_GVCF(gvcf_to_merge, dict) + // MERGE_DEEPSOMATIC_VCF(vcf_to_merge, dict) + gvcf = Channel.empty() + // // Mix intervals and no_intervals channels together + // gvcf = Channel.empty().mix(MERGE_DEEPSOMATIC_GVCF.out.vcf, gvcf_out.no_intervals) + // // add variantcaller to meta map and remove no longer necessary field: num_intervals + // .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'deepsomatic' ], vcf ] } + + vcf = Channel.empty() + // // Mix intervals and no_intervals channels together + // vcf = Channel.empty().mix(MERGE_DEEPSOMATIC_VCF.out.vcf, vcf_out.no_intervals) + // // add variantcaller to meta map and remove no longer necessary field: num_intervals + // .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'deepsomatic' ], vcf ] } + + // versions = versions.mix(DEEPSOMATIC.out.versions) + // versions = versions.mix(MERGE_DEEPSOMATIC_GVCF.out.versions) + // versions = versions.mix(MERGE_DEEPSOMATIC_VCF.out.versions) + + emit: + gvcf + vcf + + versions +} diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index cdfabfc3ac..1dfde83da0 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -3,6 +3,7 @@ // include { BAM_VARIANT_CALLING_CNVKIT } from '../bam_variant_calling_cnvkit/main' +include { BAM_VARIANT_CALLING_DEEPSOMATIC } from '../bam_variant_calling_deepsomatic/main' include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_calling_freebayes/main' include { BAM_VARIANT_CALLING_MPILEUP as MPILEUP_NORMAL } from '../bam_variant_calling_mpileup/main' include { BAM_VARIANT_CALLING_MPILEUP as MPILEUP_TUMOR } from '../bam_variant_calling_mpileup/main' @@ -127,6 +128,20 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { versions = versions.mix(BAM_VARIANT_CALLING_CNVKIT.out.versions) } + if (tools.split(',').contains('deepsomatic')) { + BAM_VARIANT_CALLING_DEEPSOMATIC( + // Remap channel to match module/subworkflow + cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, normal_cram, normal_crai ] }, + cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, tumor_cram, tumor_crai ] }, + dict, + fasta, + fasta_fai, + intervals + ) + + versions = versions.mix(BAM_VARIANT_CALLING_DEEPSOMATIC.out.versions) + } + // FREEBAYES if (tools.split(',').contains('freebayes')) { BAM_VARIANT_CALLING_FREEBAYES(