diff --git a/conf/modules/vardictjava.config b/conf/modules/vardictjava.config new file mode 100644 index 0000000000..9133290a07 --- /dev/null +++ b/conf/modules/vardictjava.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +process{ + withName: 'VARDICTJAVA' { + ext.args = { "-c 1 -S 2 -E 3 -g 4 --nosv --deldupvar -Q 10 -F 0x700 -f 0.1 -N ${meta.sample}_${meta.patient}"} // + ext.args2 = { "-f 0.1 -A -N ${meta.sample}_${meta.patient}" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.vardictjava" : "${meta.id}.vardictjava.${intervals.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('vardictjava') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "vardictjava/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_VARDICTJAVA' { + ext.prefix = { "${meta.id}.vardictjava" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/vardictjava/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } +} diff --git a/modules.json b/modules.json index 26d801647b..f62cedf487 100644 --- a/modules.json +++ b/modules.json @@ -494,6 +494,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "vardictjava": { + "branch": "master", + "git_sha": "27e170816808aedbbac23f9a1f2c7488d4b6d342", + "installed_by": ["modules"] + }, "vcftools": { "branch": "master", "git_sha": "624ecdc43b72e0a45bf05d9b57215d18dcd538f8", diff --git a/modules/nf-core/vardictjava/environment.yml b/modules/nf-core/vardictjava/environment.yml new file mode 100644 index 0000000000..4e8905e52c --- /dev/null +++ b/modules/nf-core/vardictjava/environment.yml @@ -0,0 +1,8 @@ +name: vardictjava +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::htslib=1.20 + - bioconda::vardict-java=1.8.3 diff --git a/modules/nf-core/vardictjava/main.nf b/modules/nf-core/vardictjava/main.nf new file mode 100644 index 0000000000..6329391c83 --- /dev/null +++ b/modules/nf-core/vardictjava/main.nf @@ -0,0 +1,67 @@ +process VARDICTJAVA { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-731b8c4cf44d76e9aa181af565b9eee448d82a8c:edd70e76f3529411a748168f6eb1a61f29702123-0' : + 'biocontainers/mulled-v2-731b8c4cf44d76e9aa181af565b9eee448d82a8c:edd70e76f3529411a748168f6eb1a61f29702123-0' }" + + input: + tuple val(meta), path(bams), path(bais), path(bed) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '-c 1 -S 2 -E 3' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def somatic = bams instanceof List && bams.size() == 2 ? true : false + def input = somatic ? "-b \"${bams[0]}|${bams[1]}\"" : "-b ${bams}" + def filter = somatic ? "testsomatic.R" : "teststrandbias.R" + def convert_to_vcf = somatic ? "var2vcf_paired.pl" : "var2vcf_valid.pl" + """ + export JAVA_OPTS='"-Xms${task.memory.toMega()/4}m" "-Xmx${task.memory.toGiga()}g" "-Dsamjdk.reference_fasta=${fasta}"' + vardict-java \\ + ${args} \\ + ${input} \\ + -th ${task.cpus} \\ + -G ${fasta} \\ + ${bed} \\ + | ${filter} \\ + | ${convert_to_vcf} \\ + ${args2} \\ + | bgzip ${args3} --threads ${task.cpus} > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vardict-java: \$( realpath \$( command -v vardict-java ) | sed 's/.*java-//;s/-.*//' ) + var2vcf_valid.pl: \$( var2vcf_valid.pl -h | sed '2!d;s/.* //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '-c 1 -S 2 -E 3' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo '' | gzip > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vardict-java: \$( realpath \$( command -v vardict-java ) | sed 's/.*java-//;s/-.*//' ) + var2vcf_valid.pl: \$( var2vcf_valid.pl -h | sed '2!d;s/.* //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/vardictjava/meta.yml b/modules/nf-core/vardictjava/meta.yml new file mode 100644 index 0000000000..5c2fc921ac --- /dev/null +++ b/modules/nf-core/vardictjava/meta.yml @@ -0,0 +1,70 @@ +name: "vardictjava" +description: The Java port of the VarDict variant caller +keywords: + - variant calling + - vcf + - bam + - snv + - sv +tools: + - "vardictjava": + description: "Java port of the VarDict variant discovery program" + homepage: "https://github.com/AstraZeneca-NGS/VarDictJava" + documentation: "https://github.com/AstraZeneca-NGS/VarDictJava" + tool_dev_url: "https://github.com/AstraZeneca-NGS/VarDictJava" + doi: "10.1093/nar/gkw227 " + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bams: + type: file + description: One or two BAM files. Supply two BAM files to run Vardict in paired mode. + pattern: "*.bam" + - bais: + type: file + description: Index/indices of the BAM file(s) + pattern: "*.bai" + - bed: + type: file + description: BED with the regions of interest + pattern: "*.bed" + - meta2: + type: map + description: | + Groovy Map containing fasta information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA of the reference genome + pattern: "*.{fa,fasta}" + - meta3: + type: map + description: | + Groovy Map containing fasta information + e.g. [ id:'test', single_end:false ] + - fasta_fai: + type: file + description: The index of the FASTA of the reference genome + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: VCF file output + pattern: "*.vcf.gz" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/vardictjava/tests/main.nf.test b/modules/nf-core/vardictjava/tests/main.nf.test new file mode 100644 index 0000000000..483a753a89 --- /dev/null +++ b/modules/nf-core/vardictjava/tests/main.nf.test @@ -0,0 +1,90 @@ +nextflow_process { + + name "Test Process VARDICTJAVA" + script "../main.nf" + process "VARDICTJAVA" + tag "modules" + tag "modules_nfcore" + tag "vardictjava" + + test("homo_sapiens - [bam, bai, bed] - fasta - fai") { + + when { + params { + outdir = $outputDir + } + process { + """ + input[0] = Channel.value([ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ]) + input[1] = [ + [id:"ref"], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = [ + [id:"ref"], + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + + + } + + } + + test("homo_sapiens - [[bam, bam], [bai, bai], bed] - fasta - fai") { + + when { + params { + outdir = $outputDir + } + process { + """ + input[0] = Channel.value([ + [ id:'test' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true) + ], + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ]) + input[1] = [ + [id:"ref"], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = [ + [id:"ref"], + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + + + } + + } + +} diff --git a/modules/nf-core/vardictjava/tests/main.nf.test.snap b/modules/nf-core/vardictjava/tests/main.nf.test.snap new file mode 100644 index 0000000000..c32a68b742 --- /dev/null +++ b/modules/nf-core/vardictjava/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "homo_sapiens - [bam, bai, bed] - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,e8411ecae49b4f6afa6ea0b681ea506e" + ] + ], + "1": [ + "versions.yml:md5,6bf7aa0cbaac4a6e2acab2c475ec2389" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,e8411ecae49b4f6afa6ea0b681ea506e" + ] + ], + "versions": [ + "versions.yml:md5,6bf7aa0cbaac4a6e2acab2c475ec2389" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-04T19:08:38.328190023" + }, + "homo_sapiens - [[bam, bam], [bai, bai], bed] - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,b52c874c18be636d876d1e0df4a449c3" + ] + ], + "1": [ + "versions.yml:md5,6bf7aa0cbaac4a6e2acab2c475ec2389" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,b52c874c18be636d876d1e0df4a449c3" + ] + ], + "versions": [ + "versions.yml:md5,6bf7aa0cbaac4a6e2acab2c475ec2389" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-04T19:08:54.416661915" + } +} \ No newline at end of file diff --git a/modules/nf-core/vardictjava/tests/tags.yml b/modules/nf-core/vardictjava/tests/tags.yml new file mode 100644 index 0000000000..453c9b293a --- /dev/null +++ b/modules/nf-core/vardictjava/tests/tags.yml @@ -0,0 +1,2 @@ +vardictjava: + - modules/nf-core/vardictjava/** diff --git a/nextflow.config b/nextflow.config index f9fa756dbb..929f26ef90 100644 --- a/nextflow.config +++ b/nextflow.config @@ -442,6 +442,7 @@ includeConfig 'conf/modules/sentieon_haplotyper_joint_germline.config' includeConfig 'conf/modules/strelka.config' includeConfig 'conf/modules/tiddit.config' includeConfig 'conf/modules/post_variant_calling.config' +includeConfig 'conf/modules/vardictjava.config' //annotate includeConfig 'conf/modules/annotate.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 1611d58f40..04ba631301 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -111,8 +111,8 @@ "type": "string", "fa_icon": "fas fa-toolbox", "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", - "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), VarDictJava and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", + "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vardictjava|vep)?,?)*(? + bam: cram.extension == "bam" + cram: cram.extension == "cram"} + .set{ch_bam_from_cram} + + CRAM_TO_BAM( + ch_bam_from_cram.cram, + fasta, + fasta_fai + ) + + // Combine converted bam, bai and intervals + ch_bam_from_cram.bam + .mix(CRAM_TO_BAM.out.bam.join(CRAM_TO_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true)) + .combine(intervals) + .map{meta, bam, bai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], bam, bai, intervals ]} + .set{ ch_vardict_input} + + VARDICTJAVA( + ch_vardict_input, + fasta, + fasta_fai + ) + + // Figuring out if there is one or more vcf(s) from the same sample + vcf = VARDICTJAVA.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_to_merge = vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + + MERGE_VARDICTJAVA(vcf_to_merge, dict) + + // Mix intervals and no_intervals channels together + vcf = Channel.empty().mix(MERGE_VARDICTJAVA.out.vcf, vcf.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'vardictjava' ], vcf ] } + + versions = versions.mix(VARDICTJAVA.out.versions) + versions = versions.mix(MERGE_VARDICTJAVA.out.versions) + + emit: + vcf + + versions +} diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index 90307f19c2..68955447d7 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -843,6 +843,7 @@ workflow SAREK { vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_strelka) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_tiddit) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_mpileup) + vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_vardictjava) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all)