From a1281273189812e7f026d4b73cfc10e82ac66e79 Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 12:52:58 +0100 Subject: [PATCH 01/16] vardictjava module installed --- modules.json | 5 ++ modules/nf-core/vardictjava/environment.yml | 7 ++ modules/nf-core/vardictjava/main.nf | 65 ++++++++++++++ modules/nf-core/vardictjava/meta.yml | 70 +++++++++++++++ .../nf-core/vardictjava/tests/main.nf.test | 90 +++++++++++++++++++ .../vardictjava/tests/main.nf.test.snap | 68 ++++++++++++++ modules/nf-core/vardictjava/tests/tags.yml | 2 + 7 files changed, 307 insertions(+) create mode 100644 modules/nf-core/vardictjava/environment.yml create mode 100644 modules/nf-core/vardictjava/main.nf create mode 100644 modules/nf-core/vardictjava/meta.yml create mode 100644 modules/nf-core/vardictjava/tests/main.nf.test create mode 100644 modules/nf-core/vardictjava/tests/main.nf.test.snap create mode 100644 modules/nf-core/vardictjava/tests/tags.yml diff --git a/modules.json b/modules.json index a64bdb2c7d..3a9db78971 100644 --- a/modules.json +++ b/modules.json @@ -479,6 +479,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "vardictjava": { + "branch": "master", + "git_sha": "a0fdd948e3e59c8e020fc9d94cb54e0da143c07c", + "installed_by": ["modules"] + }, "vcftools": { "branch": "master", "git_sha": "624ecdc43b72e0a45bf05d9b57215d18dcd538f8", diff --git a/modules/nf-core/vardictjava/environment.yml b/modules/nf-core/vardictjava/environment.yml new file mode 100644 index 0000000000..92d169fd8b --- /dev/null +++ b/modules/nf-core/vardictjava/environment.yml @@ -0,0 +1,7 @@ +name: vardictjava +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::vardict-java=1.8.3 diff --git a/modules/nf-core/vardictjava/main.nf b/modules/nf-core/vardictjava/main.nf new file mode 100644 index 0000000000..17e2aff772 --- /dev/null +++ b/modules/nf-core/vardictjava/main.nf @@ -0,0 +1,65 @@ +process VARDICTJAVA { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vardict-java:1.8.3--hdfd78af_0': + 'biocontainers/vardict-java:1.8.3--hdfd78af_0' }" + + input: + tuple val(meta), path(bams), path(bais), path(bed) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + + output: + tuple val(meta), path("*.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '-c 1 -S 2 -E 3' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def somatic = bams instanceof List && bams.size() == 2 ? true : false + def input = somatic ? "-b \"${bams[0]}|${bams[1]}\"" : "-b ${bams}" + def filter = somatic ? "testsomatic.R" : "teststrandbias.R" + def convert_to_vcf = somatic ? "var2vcf_paired.pl" : "var2vcf_valid.pl" + """ + export JAVA_OPTS='"-Xms${task.memory.toMega()/4}m" "-Xmx${task.memory.toGiga()}g" "-Dsamjdk.reference_fasta=${fasta}"' + vardict-java \\ + ${args} \\ + ${input} \\ + -th ${task.cpus} \\ + -G ${fasta} \\ + ${bed} \\ + | ${filter} \\ + | ${convert_to_vcf} \\ + ${args2} \\ + > ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vardict-java: \$( realpath \$( command -v vardict-java ) | sed 's/.*java-//;s/-.*//' ) + var2vcf_valid.pl: \$( var2vcf_valid.pl -h | sed '2!d;s/.* //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '-c 1 -S 2 -E 3' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vardict-java: \$( realpath \$( command -v vardict-java ) | sed 's/.*java-//;s/-.*//' ) + var2vcf_valid.pl: \$( var2vcf_valid.pl -h | sed '2!d;s/.* //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/vardictjava/meta.yml b/modules/nf-core/vardictjava/meta.yml new file mode 100644 index 0000000000..5c2fc921ac --- /dev/null +++ b/modules/nf-core/vardictjava/meta.yml @@ -0,0 +1,70 @@ +name: "vardictjava" +description: The Java port of the VarDict variant caller +keywords: + - variant calling + - vcf + - bam + - snv + - sv +tools: + - "vardictjava": + description: "Java port of the VarDict variant discovery program" + homepage: "https://github.com/AstraZeneca-NGS/VarDictJava" + documentation: "https://github.com/AstraZeneca-NGS/VarDictJava" + tool_dev_url: "https://github.com/AstraZeneca-NGS/VarDictJava" + doi: "10.1093/nar/gkw227 " + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bams: + type: file + description: One or two BAM files. Supply two BAM files to run Vardict in paired mode. + pattern: "*.bam" + - bais: + type: file + description: Index/indices of the BAM file(s) + pattern: "*.bai" + - bed: + type: file + description: BED with the regions of interest + pattern: "*.bed" + - meta2: + type: map + description: | + Groovy Map containing fasta information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA of the reference genome + pattern: "*.{fa,fasta}" + - meta3: + type: map + description: | + Groovy Map containing fasta information + e.g. [ id:'test', single_end:false ] + - fasta_fai: + type: file + description: The index of the FASTA of the reference genome + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: VCF file output + pattern: "*.vcf.gz" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/vardictjava/tests/main.nf.test b/modules/nf-core/vardictjava/tests/main.nf.test new file mode 100644 index 0000000000..483a753a89 --- /dev/null +++ b/modules/nf-core/vardictjava/tests/main.nf.test @@ -0,0 +1,90 @@ +nextflow_process { + + name "Test Process VARDICTJAVA" + script "../main.nf" + process "VARDICTJAVA" + tag "modules" + tag "modules_nfcore" + tag "vardictjava" + + test("homo_sapiens - [bam, bai, bed] - fasta - fai") { + + when { + params { + outdir = $outputDir + } + process { + """ + input[0] = Channel.value([ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ]) + input[1] = [ + [id:"ref"], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = [ + [id:"ref"], + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + + + } + + } + + test("homo_sapiens - [[bam, bam], [bai, bai], bed] - fasta - fai") { + + when { + params { + outdir = $outputDir + } + process { + """ + input[0] = Channel.value([ + [ id:'test' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true) + ], + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ]) + input[1] = [ + [id:"ref"], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = [ + [id:"ref"], + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + + + } + + } + +} diff --git a/modules/nf-core/vardictjava/tests/main.nf.test.snap b/modules/nf-core/vardictjava/tests/main.nf.test.snap new file mode 100644 index 0000000000..c04abcec13 --- /dev/null +++ b/modules/nf-core/vardictjava/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "homo_sapiens - [bam, bai, bed] - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,e8411ecae49b4f6afa6ea0b681ea506e" + ] + ], + "1": [ + "versions.yml:md5,6bf7aa0cbaac4a6e2acab2c475ec2389" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,e8411ecae49b4f6afa6ea0b681ea506e" + ] + ], + "versions": [ + "versions.yml:md5,6bf7aa0cbaac4a6e2acab2c475ec2389" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2023-10-24T09:24:20.923172332" + }, + "homo_sapiens - [[bam, bam], [bai, bai], bed] - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,b52c874c18be636d876d1e0df4a449c3" + ] + ], + "1": [ + "versions.yml:md5,6bf7aa0cbaac4a6e2acab2c475ec2389" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,b52c874c18be636d876d1e0df4a449c3" + ] + ], + "versions": [ + "versions.yml:md5,6bf7aa0cbaac4a6e2acab2c475ec2389" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-15T05:05:37.320114013" + } +} \ No newline at end of file diff --git a/modules/nf-core/vardictjava/tests/tags.yml b/modules/nf-core/vardictjava/tests/tags.yml new file mode 100644 index 0000000000..453c9b293a --- /dev/null +++ b/modules/nf-core/vardictjava/tests/tags.yml @@ -0,0 +1,2 @@ +vardictjava: + - modules/nf-core/vardictjava/** From 4bd6abb8eb8bb44140cf3afaf30c75eddf471085 Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 13:04:42 +0100 Subject: [PATCH 02/16] created vardictjava config --- conf/modules/vardictjava.config | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 conf/modules/vardictjava.config diff --git a/conf/modules/vardictjava.config b/conf/modules/vardictjava.config new file mode 100644 index 0000000000..9133290a07 --- /dev/null +++ b/conf/modules/vardictjava.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +process{ + withName: 'VARDICTJAVA' { + ext.args = { "-c 1 -S 2 -E 3 -g 4 --nosv --deldupvar -Q 10 -F 0x700 -f 0.1 -N ${meta.sample}_${meta.patient}"} // + ext.args2 = { "-f 0.1 -A -N ${meta.sample}_${meta.patient}" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.vardictjava" : "${meta.id}.vardictjava.${intervals.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('vardictjava') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "vardictjava/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_VARDICTJAVA' { + ext.prefix = { "${meta.id}.vardictjava" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/vardictjava/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } +} From cb5e9d9b4029eb03ac81ed8c73e32300f268d790 Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 13:11:02 +0100 Subject: [PATCH 03/16] vardictjava subworkflow --- subworkflows/local/bam_variant_calling_single_vardictjava/main.nf | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 subworkflows/local/bam_variant_calling_single_vardictjava/main.nf diff --git a/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf b/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf new file mode 100644 index 0000000000..e69de29bb2 From fa32ec460dc0948ba4af0b9bb9caa3873c42f285 Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 13:19:06 +0100 Subject: [PATCH 04/16] created workflow logic --- .../main.nf | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf b/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf index e69de29bb2..5f8dd1d816 100644 --- a/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf +++ b/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf @@ -0,0 +1,72 @@ +// +// Vardictjava germline calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { VARDICTJAVA } from '../../../modules/nf-core/vardictjava/main' +include { GATK4_MERGEVCFS as MERGE_VARDICTJAVA } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../../../modules/nf-core/samtools/convert/main' + +workflow BAM_VARIANT_CALLING_SINGLE_VARDICTJAVA { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + dict // channel: [optional] [ meta, dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Convert cram to bam + cram + .branch {meta, cram, crai -> + bam: cram.extension == "bam" + cram: cram.extension == "cram"} + .set{ch_bam_from_cram} + + CRAM_TO_BAM( + ch_bam_from_cram.cram, + fasta, + fasta_fai + ) + + // Combine converted bam, bai and intervals + ch_bam_from_cram.bam + .mix(CRAM_TO_BAM.out.alignment_index) + .combine(intervals) + .map{meta, bam, bai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], bam, bai, intervals ]} + .set{ ch_vardict_input} + + VARDICTJAVA( + ch_vardict_input, + fasta.map{fasta -> [[id:fasta.baseName], fasta]}, + fasta_fai.map{fasta_fai -> [[id:fasta_fai.baseName], fasta_fai]} + ) + + // Figuring out if there is one or more vcf(s) from the same sample + vcf = VARDICTJAVA.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_to_merge = vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + + MERGE_VARDICTJAVA(vcf_to_merge, dict) + + // Mix intervals and no_intervals channels together + vcf = Channel.empty().mix(MERGE_VARDICTJAVA.out.vcf, vcf.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'vardictjava' ], vcf ] } + + versions = versions.mix(VARDICTJAVA.out.versions) + versions = versions.mix(MERGE_VARDICTJAVA.out.versions) + + emit: + vcf + + versions +} From a196521a975ba6505059f11f97823bcfb9c2fba5 Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 13:26:33 +0100 Subject: [PATCH 05/16] vardictjava added to germline all --- .../bam_variant_calling_germline_all/main.nf | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 79efd8bf94..1cc8b4d641 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -13,6 +13,7 @@ include { BAM_VARIANT_CALLING_SENTIEON_DNASCOPE include { BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER } from '../bam_variant_calling_sentieon_haplotyper/main' include { BAM_VARIANT_CALLING_MPILEUP } from '../bam_variant_calling_mpileup/main' include { BAM_VARIANT_CALLING_SINGLE_STRELKA } from '../bam_variant_calling_single_strelka/main' +include { BAM_VARIANT_CALLING_SINGLE_VARDICTJAVA } from '../bam_variant_calling_single_vardictjava/main' include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_calling_single_tiddit/main' include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main' include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' @@ -66,6 +67,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_sentieon_haplotyper = Channel.empty() vcf_strelka = Channel.empty() vcf_tiddit = Channel.empty() + vcf_vardictjava = Channel.empty() // BCFTOOLS MPILEUP if (tools.split(',').contains('mpileup')) { @@ -327,6 +329,20 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { versions = versions.mix(BAM_VARIANT_CALLING_SINGLE_STRELKA.out.versions) } + // VARDICTJAVA + if (tools.split(',').contains('vardictjava')) { + BAM_VARIANT_CALLING_SINGLE_VARDICTJAVA( + cram, + dict, + fasta, // TODO CHECK Do I need to remap fasta and fasta_fai to match module? + fasta_fai, + intervals + ) + + vcf_vardictjava = BAM_VARIANT_CALLING_SINGLE_VARDICTJAVA.out.vcf + versions = versions.mix(BAM_VARIANT_CALLING_SINGLE_VARDICTJAVA.out.versions) + } + // TIDDIT if (tools.split(',').contains('tiddit')) { BAM_VARIANT_CALLING_SINGLE_TIDDIT( @@ -349,7 +365,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_mpileup, vcf_sentieon_haplotyper, vcf_strelka, - vcf_tiddit + vcf_tiddit, + vcf_vardictjava ) emit: @@ -365,6 +382,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_sentieon_dnascope vcf_sentieon_haplotyper vcf_tiddit + vcf_vardictjava versions } From fd8d68e7a13cc14acf7e69da083608b676b96410 Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 13:37:08 +0100 Subject: [PATCH 06/16] update nextflow config --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index f26930b9d1..efb8c72f1a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -442,6 +442,7 @@ includeConfig 'conf/modules/sentieon_haplotyper_joint_germline.config' includeConfig 'conf/modules/strelka.config' includeConfig 'conf/modules/tiddit.config' includeConfig 'conf/modules/post_variant_calling.config' +includeConfig 'conf/modules/vardictjava.config' //annotate includeConfig 'conf/modules/annotate.config' From 9f288c6f0ea46e05fd06208e31158daac91228cd Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 13:40:14 +0100 Subject: [PATCH 07/16] nextflow schema updated --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 7545b9930e..b9f17237ba 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -111,8 +111,8 @@ "type": "string", "fa_icon": "fas fa-toolbox", "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", - "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", + "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|vardictjava|tiddit|vep)?,?)*(? Date: Wed, 3 Jul 2024 13:44:22 +0100 Subject: [PATCH 08/16] vcf_vardictjava added to main --- workflows/sarek/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index 164a44f45e..a9effd391f 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -807,6 +807,7 @@ workflow SAREK { vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_strelka) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_tiddit) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_mpileup) + vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_vardictjava) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all) From d68b73de006dce6f325bd09a33d1e4e40583dbea Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 14:11:33 +0100 Subject: [PATCH 09/16] mulled container vardictjava htslib --- modules/nf-core/vardictjava/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/vardictjava/main.nf b/modules/nf-core/vardictjava/main.nf index 17e2aff772..fb0dccdcf7 100644 --- a/modules/nf-core/vardictjava/main.nf +++ b/modules/nf-core/vardictjava/main.nf @@ -4,8 +4,8 @@ process VARDICTJAVA { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/vardict-java:1.8.3--hdfd78af_0': - 'biocontainers/vardict-java:1.8.3--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-731b8c4cf44d76e9aa181af565b9eee448d82a8c:6060dd7502a5e03e6a1f777c60ec85e4f7f58ec5-0': + 'biocontainers/mulled-v2-731b8c4cf44d76e9aa181af565b9eee448d82a8c:6060dd7502a5e03e6a1f777c60ec85e4f7f58ec5-0' }" input: tuple val(meta), path(bams), path(bais), path(bed) From f46b69da1e4662115edd0c1f122d32198d8979e0 Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 14:14:02 +0100 Subject: [PATCH 10/16] updated vardictjava module --- modules/nf-core/vardictjava/main.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/vardictjava/main.nf b/modules/nf-core/vardictjava/main.nf index fb0dccdcf7..6a1b3705db 100644 --- a/modules/nf-core/vardictjava/main.nf +++ b/modules/nf-core/vardictjava/main.nf @@ -13,7 +13,7 @@ process VARDICTJAVA { tuple val(meta3), path(fasta_fai) output: - tuple val(meta), path("*.vcf"), emit: vcf + tuple val(meta), path("*.vcf.gz"), emit: vcf path "versions.yml" , emit: versions when: @@ -40,6 +40,7 @@ process VARDICTJAVA { | ${convert_to_vcf} \\ ${args2} \\ > ${prefix}.vcf + bgzip ${prefix}.vcf cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -54,7 +55,7 @@ process VARDICTJAVA { def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.vcf + touch ${prefix}.vcf.gz cat <<-END_VERSIONS > versions.yml "${task.process}": From 1ae54bd7b2b3405fa2f40470f34574ce8961a552 Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 16:24:44 +0100 Subject: [PATCH 11/16] replacing alignment index --- .../local/bam_variant_calling_single_vardictjava/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf b/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf index 5f8dd1d816..ba87184463 100644 --- a/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf +++ b/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf @@ -34,7 +34,7 @@ workflow BAM_VARIANT_CALLING_SINGLE_VARDICTJAVA { // Combine converted bam, bai and intervals ch_bam_from_cram.bam - .mix(CRAM_TO_BAM.out.alignment_index) + .mix(CRAM_TO_BAM.out.bam.join(CRAM_TO_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true)) .combine(intervals) .map{meta, bam, bai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], bam, bai, intervals ]} .set{ ch_vardict_input} From 66a2c285f0f80a6bf47e287974393096b8fdde73 Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 16:30:58 +0100 Subject: [PATCH 12/16] vardictjava-htslib container --- modules/nf-core/vardictjava/environment.yml | 1 + modules/nf-core/vardictjava/main.nf | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/vardictjava/environment.yml b/modules/nf-core/vardictjava/environment.yml index 92d169fd8b..4e8905e52c 100644 --- a/modules/nf-core/vardictjava/environment.yml +++ b/modules/nf-core/vardictjava/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: + - bioconda::htslib=1.20 - bioconda::vardict-java=1.8.3 diff --git a/modules/nf-core/vardictjava/main.nf b/modules/nf-core/vardictjava/main.nf index 6a1b3705db..5c52506ece 100644 --- a/modules/nf-core/vardictjava/main.nf +++ b/modules/nf-core/vardictjava/main.nf @@ -3,9 +3,7 @@ process VARDICTJAVA { label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-731b8c4cf44d76e9aa181af565b9eee448d82a8c:6060dd7502a5e03e6a1f777c60ec85e4f7f58ec5-0': - 'biocontainers/mulled-v2-731b8c4cf44d76e9aa181af565b9eee448d82a8c:6060dd7502a5e03e6a1f777c60ec85e4f7f58ec5-0' }" + container "community.wave.seqera.io/library/htslib_vardict-java:d0da881a1909bfa9" input: tuple val(meta), path(bams), path(bais), path(bed) From 108845eae07da14fe0cc522126002342edb4feea Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Wed, 3 Jul 2024 17:05:04 +0100 Subject: [PATCH 13/16] removed meta map --- .../local/bam_variant_calling_single_vardictjava/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf b/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf index ba87184463..3ab2b4d3a3 100644 --- a/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf +++ b/subworkflows/local/bam_variant_calling_single_vardictjava/main.nf @@ -41,8 +41,8 @@ workflow BAM_VARIANT_CALLING_SINGLE_VARDICTJAVA { VARDICTJAVA( ch_vardict_input, - fasta.map{fasta -> [[id:fasta.baseName], fasta]}, - fasta_fai.map{fasta_fai -> [[id:fasta_fai.baseName], fasta_fai]} + fasta, + fasta_fai ) // Figuring out if there is one or more vcf(s) from the same sample From 1d048d69de9b1552d510a0dbbda509db5f1ee905 Mon Sep 17 00:00:00 2001 From: eolaniru <149598276+eolaniru@users.noreply.github.com> Date: Thu, 4 Jul 2024 14:48:25 +0100 Subject: [PATCH 14/16] mulled biocontainer added --- modules/nf-core/vardictjava/main.nf | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/vardictjava/main.nf b/modules/nf-core/vardictjava/main.nf index 5c52506ece..6329391c83 100644 --- a/modules/nf-core/vardictjava/main.nf +++ b/modules/nf-core/vardictjava/main.nf @@ -3,7 +3,9 @@ process VARDICTJAVA { label 'process_high' conda "${moduleDir}/environment.yml" - container "community.wave.seqera.io/library/htslib_vardict-java:d0da881a1909bfa9" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-731b8c4cf44d76e9aa181af565b9eee448d82a8c:edd70e76f3529411a748168f6eb1a61f29702123-0' : + 'biocontainers/mulled-v2-731b8c4cf44d76e9aa181af565b9eee448d82a8c:edd70e76f3529411a748168f6eb1a61f29702123-0' }" input: tuple val(meta), path(bams), path(bais), path(bed) @@ -20,6 +22,7 @@ process VARDICTJAVA { script: def args = task.ext.args ?: '-c 1 -S 2 -E 3' def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def somatic = bams instanceof List && bams.size() == 2 ? true : false @@ -37,8 +40,7 @@ process VARDICTJAVA { | ${filter} \\ | ${convert_to_vcf} \\ ${args2} \\ - > ${prefix}.vcf - bgzip ${prefix}.vcf + | bgzip ${args3} --threads ${task.cpus} > ${prefix}.vcf.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -50,10 +52,11 @@ process VARDICTJAVA { stub: def args = task.ext.args ?: '-c 1 -S 2 -E 3' def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.vcf.gz + echo '' | gzip > ${prefix}.vcf.gz cat <<-END_VERSIONS > versions.yml "${task.process}": From 7bf9e602e08be01abd54084ab940b1f27a0a7fbd Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Fri, 5 Jul 2024 09:46:01 +0100 Subject: [PATCH 15/16] update test snap --- modules/nf-core/vardictjava/tests/main.nf.test | 2 +- .../nf-core/vardictjava/tests/main.nf.test.snap | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/nf-core/vardictjava/tests/main.nf.test b/modules/nf-core/vardictjava/tests/main.nf.test index 483a753a89..039db2cc0f 100644 --- a/modules/nf-core/vardictjava/tests/main.nf.test +++ b/modules/nf-core/vardictjava/tests/main.nf.test @@ -87,4 +87,4 @@ nextflow_process { } -} +} \ No newline at end of file diff --git a/modules/nf-core/vardictjava/tests/main.nf.test.snap b/modules/nf-core/vardictjava/tests/main.nf.test.snap index c04abcec13..c32a68b742 100644 --- a/modules/nf-core/vardictjava/tests/main.nf.test.snap +++ b/modules/nf-core/vardictjava/tests/main.nf.test.snap @@ -7,7 +7,7 @@ { "id": "test" }, - "test.vcf:md5,e8411ecae49b4f6afa6ea0b681ea506e" + "test.vcf.gz:md5,e8411ecae49b4f6afa6ea0b681ea506e" ] ], "1": [ @@ -18,7 +18,7 @@ { "id": "test" }, - "test.vcf:md5,e8411ecae49b4f6afa6ea0b681ea506e" + "test.vcf.gz:md5,e8411ecae49b4f6afa6ea0b681ea506e" ] ], "versions": [ @@ -28,9 +28,9 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2023-10-24T09:24:20.923172332" + "timestamp": "2024-07-04T19:08:38.328190023" }, "homo_sapiens - [[bam, bam], [bai, bai], bed] - fasta - fai": { "content": [ @@ -40,7 +40,7 @@ { "id": "test" }, - "test.vcf:md5,b52c874c18be636d876d1e0df4a449c3" + "test.vcf.gz:md5,b52c874c18be636d876d1e0df4a449c3" ] ], "1": [ @@ -51,7 +51,7 @@ { "id": "test" }, - "test.vcf:md5,b52c874c18be636d876d1e0df4a449c3" + "test.vcf.gz:md5,b52c874c18be636d876d1e0df4a449c3" ] ], "versions": [ @@ -61,8 +61,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-05-15T05:05:37.320114013" + "timestamp": "2024-07-04T19:08:54.416661915" } } \ No newline at end of file From 92b767514f2cb347731c318ac267636d3c9eb342 Mon Sep 17 00:00:00 2001 From: Edward Olaniru Date: Mon, 15 Jul 2024 16:01:10 +0100 Subject: [PATCH 16/16] updated vardictjava module to match remote --- modules.json | 2 +- modules/nf-core/vardictjava/tests/main.nf.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index 28d3c197bd..351ad1fe3e 100644 --- a/modules.json +++ b/modules.json @@ -496,7 +496,7 @@ }, "vardictjava": { "branch": "master", - "git_sha": "a0fdd948e3e59c8e020fc9d94cb54e0da143c07c", + "git_sha": "27e170816808aedbbac23f9a1f2c7488d4b6d342", "installed_by": ["modules"] }, "vcftools": { diff --git a/modules/nf-core/vardictjava/tests/main.nf.test b/modules/nf-core/vardictjava/tests/main.nf.test index 039db2cc0f..483a753a89 100644 --- a/modules/nf-core/vardictjava/tests/main.nf.test +++ b/modules/nf-core/vardictjava/tests/main.nf.test @@ -87,4 +87,4 @@ nextflow_process { } -} \ No newline at end of file +}