From 37d46d206d254f15cb254f47a114efc1d558abc1 Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Thu, 11 Jul 2024 11:53:50 +0200 Subject: [PATCH 01/18] dysgu_added --- modules.json | 5 + modules/nf-core/dysgu/environment.yml | 7 ++ modules/nf-core/dysgu/main.nf | 55 +++++++++++ modules/nf-core/dysgu/meta.yml | 68 +++++++++++++ modules/nf-core/dysgu/tests/main.nf.test | 96 +++++++++++++++++++ modules/nf-core/dysgu/tests/main.nf.test.snap | 75 +++++++++++++++ modules/nf-core/dysgu/tests/nextflow.config | 5 + modules/nf-core/dysgu/tests/tags.yml | 2 + 8 files changed, 313 insertions(+) create mode 100644 modules/nf-core/dysgu/environment.yml create mode 100644 modules/nf-core/dysgu/main.nf create mode 100644 modules/nf-core/dysgu/meta.yml create mode 100644 modules/nf-core/dysgu/tests/main.nf.test create mode 100644 modules/nf-core/dysgu/tests/main.nf.test.snap create mode 100644 modules/nf-core/dysgu/tests/nextflow.config create mode 100644 modules/nf-core/dysgu/tests/tags.yml diff --git a/modules.json b/modules.json index a64bdb2c7d..d64d140459 100644 --- a/modules.json +++ b/modules.json @@ -129,6 +129,11 @@ "installed_by": ["modules"], "patch": "modules/nf-core/dragmap/hashtable/dragmap-hashtable.diff" }, + "dysgu": { + "branch": "master", + "git_sha": "c27498285a0beca2239b395cf88129c586a837fc", + "installed_by": ["modules"] + }, "ensemblvep/download": { "branch": "master", "git_sha": "3db4f8488315cd7d7cf3fcb64251f6603210e831", diff --git a/modules/nf-core/dysgu/environment.yml b/modules/nf-core/dysgu/environment.yml new file mode 100644 index 0000000000..5efb6db61a --- /dev/null +++ b/modules/nf-core/dysgu/environment.yml @@ -0,0 +1,7 @@ +name: dysgu +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - dysgu diff --git a/modules/nf-core/dysgu/main.nf b/modules/nf-core/dysgu/main.nf new file mode 100644 index 0000000000..6aa1deea95 --- /dev/null +++ b/modules/nf-core/dysgu/main.nf @@ -0,0 +1,55 @@ +process DYSGU { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/dysgu:48830f55112c399e': + 'community.wave.seqera.io/library/dysgu:faf71ac972284412' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path('*.vcf.gz') , emit: vcf + tuple val(meta), path('*.vcf.gz.tbi') , emit: tbi + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + dysgu run \\ + -p ${task.cpus} \\ + -x \\ + $fasta \\ + . \\ + $input \\ + | bgzip ${args2} --threads ${task.cpus} --stdout > ${prefix}.vcf.gz + tabix ${args3} ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dysgu: \$(dysgu --version 2>&1) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dysgu: \$(dysgu --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dysgu/meta.yml b/modules/nf-core/dysgu/meta.yml new file mode 100644 index 0000000000..67c6c70abf --- /dev/null +++ b/modules/nf-core/dysgu/meta.yml @@ -0,0 +1,68 @@ +name: dysgu + +description: Dysgu calls structural variants (SVs) from mapped sequencing reads. It is designed for accurate and efficient detection of structural variations. +keywords: + - structural variants + - sv + - vcf +tools: + - dysgu: + description: Structural variant caller for mapped sequencing data + homepage: https://github.com/kcleal/dysgu + documentation: https://github.com/kcleal/dysgu/blob/master/README.rst + tool_dev_url: https://github.com/kcleal/dysgu + doi: "10.1093/nar/gkac039" + licence: ["GPL-3.0-or-later"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - input: + type: file + description: Input BAM file + pattern: "*.bam" + - index: + type: file + description: BAM index file + pattern: "*.bai" + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - vcf: + type: file + description: VCF file with identified structural variants + pattern: "*.{vcf.gz}" + - tbi: + type: file + description: The index of the BCF/VCF file + pattern: "*.{vcf.gz.tbi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@famosab" + - "@poddarharsh15" +maintainers: + - "@poddarharsh15" diff --git a/modules/nf-core/dysgu/tests/main.nf.test b/modules/nf-core/dysgu/tests/main.nf.test new file mode 100644 index 0000000000..1714b5b7ab --- /dev/null +++ b/modules/nf-core/dysgu/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_process { + + name "Test Process DYSGU" + script "../main.nf" + process "DYSGU" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "dysgu" + + + test("human - bam") { + + when { + process { + """ + input[0] = [ [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ [ id:'reference'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") } + ) + } + + } + + + test("human - cram") { + + when { + process { + """ + input[0] = [ [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ] + input[1] = [ [ id:'reference'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") } + ) + } + + } + + + test("human - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ [ id:'reference'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/dysgu/tests/main.nf.test.snap b/modules/nf-core/dysgu/tests/main.nf.test.snap new file mode 100644 index 0000000000..ecf725f5d3 --- /dev/null +++ b/modules/nf-core/dysgu/tests/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "human - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,cf1e0487502108690603dd16f034bf5e" + ], + "tbi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,cf1e0487502108690603dd16f034bf5e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-08T13:18:45.660262" + }, + "human - bam": { + "content": [ + [ + "versions.yml:md5,cf1e0487502108690603dd16f034bf5e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-08T13:19:56.62312" + }, + "human - cram": { + "content": [ + [ + "versions.yml:md5,cf1e0487502108690603dd16f034bf5e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-08T13:20:04.494134" + } +} \ No newline at end of file diff --git a/modules/nf-core/dysgu/tests/nextflow.config b/modules/nf-core/dysgu/tests/nextflow.config new file mode 100644 index 0000000000..5336ab55cf --- /dev/null +++ b/modules/nf-core/dysgu/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: DYSGU { + ext.args = '--exome ' + } +} \ No newline at end of file diff --git a/modules/nf-core/dysgu/tests/tags.yml b/modules/nf-core/dysgu/tests/tags.yml new file mode 100644 index 0000000000..6bffc95e91 --- /dev/null +++ b/modules/nf-core/dysgu/tests/tags.yml @@ -0,0 +1,2 @@ +dysgu: + - "modules/nf-core/dysgu/**" From c6cdd2dc4516e2a8342cd09f063d2d16aa708761 Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Thu, 11 Jul 2024 13:49:11 +0200 Subject: [PATCH 02/18] dysgu_main.nf_update --- .../local/bam_variant_calling_dysgu/main.nf | 43 +++++++++++++++++++ .../bam_variant_calling_germline_all/main.nf | 1 + 2 files changed, 44 insertions(+) create mode 100644 subworkflows/local/bam_variant_calling_dysgu/main.nf diff --git a/subworkflows/local/bam_variant_calling_dysgu/main.nf b/subworkflows/local/bam_variant_calling_dysgu/main.nf new file mode 100644 index 0000000000..60ed61eb69 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_dysgu/main.nf @@ -0,0 +1,43 @@ +// +// dysgu variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { DYSGU } from '../../../modules/nf-core/dysgu/main' + +// Seems to be the consensus on upstream modules implementation too +workflow BAM_VARIANT_CALLING_GERMLINE_DYSGU { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + fasta // channel: [mandatory] [ meta, fasta ] + fasta_fai // channel: [mandatory] [ meta, fasta_fai ] + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi] or [ [], []] if no intervals; intervals file contains all intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals, account for 0 intervals + cram_intervals = cram.combine(intervals).map{ it -> + bed_gz = it.size() > 3 ? it[3] : [] + bed_tbi = it.size() > 3 ? it[4] : [] + + [it[0], it[1], it[2], bed_gz, bed_tbi] + } + + DYSGU(cram_intervals, fasta, fasta_fai, []) + + + dysgu_vcf = DYSGU.out.vcf + + // Only dysgu SV should get annotated + // add variantcaller to meta map + vcf = dysgu_vcf.map{ meta, vcf -> [ meta + [ variantcaller:'dysgu' ], vcf ] } + + versions = versions.mix(DYSGU.out.versions) + + emit: + vcf + + versions +} diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 79efd8bf94..2733ea1bd9 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -17,6 +17,7 @@ include { BAM_VARIANT_CALLING_SINGLE_TIDDIT include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main' include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' include { VCF_VARIANT_FILTERING_GATK as SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' +include { BAM_VARIANT_CALLING_DYSGU } from '../bam_variant_calling_dysgu/main' From ab5af46cec4c76fb452ed87579375d288b4f88a2 Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Thu, 11 Jul 2024 13:57:32 +0200 Subject: [PATCH 03/18] adding_dysgu_geramline_main --- .../bam_variant_calling_germline_all/main.nf | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 2733ea1bd9..5e03c492db 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -67,6 +67,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_sentieon_haplotyper = Channel.empty() vcf_strelka = Channel.empty() vcf_tiddit = Channel.empty() + vcf_dysgu = Channel.empty() // BCFTOOLS MPILEUP if (tools.split(',').contains('mpileup')) { @@ -191,6 +192,18 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_MANTA.out.versions) } + // DYSGU + if (tools.split(',').contains('dysgu')) { + BAM_VARIANT_CALLING_DYSGU ( + cram, + fasta, + fasta_fai + ) + + vcf_dysgu = BAM_VARIANT_CALLING_GERMLINE_DYSGU.out.vcf + versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_DYSGU.out.versions) + } + // SENTIEON DNASCOPE if (tools.split(',').contains('sentieon_dnascope')) { BAM_VARIANT_CALLING_SENTIEON_DNASCOPE( @@ -350,7 +363,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_mpileup, vcf_sentieon_haplotyper, vcf_strelka, - vcf_tiddit + vcf_tiddit, + vcf_dysgu ) emit: @@ -366,6 +380,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_sentieon_dnascope vcf_sentieon_haplotyper vcf_tiddit + vcf_dysgu versions } From 327d99b780e70e293059d6e939ac8f7b35829604 Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Thu, 11 Jul 2024 14:00:20 +0200 Subject: [PATCH 04/18] dysgu.config_update --- conf/modules/dysgu.config | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 conf/modules/dysgu.config diff --git a/conf/modules/dysgu.config b/conf/modules/dysgu.config new file mode 100644 index 0000000000..8af87c5a70 --- /dev/null +++ b/conf/modules/dysgu.config @@ -0,0 +1,28 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// DYSGU + +process { + if (params.tools && params.tools.split(',').contains('dysgu')) { + withName: 'DYSGU_GERMLINE' { + ext.args = { params.wes ? "--exome" : '' } + ext.prefix = { "${meta.id}.dysgu" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/dysgu/${meta.id}" }, + pattern: "*{dysgu}.{vcf.gz,vcf.gz.tbi}" + ] + } + } +} From 40b31094794ee47cdb9fe370ba1e0f561d5613ce Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Thu, 11 Jul 2024 14:45:36 +0200 Subject: [PATCH 05/18] dysgu_main.nf_update --- subworkflows/local/bam_variant_calling_dysgu/main.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_dysgu/main.nf b/subworkflows/local/bam_variant_calling_dysgu/main.nf index 60ed61eb69..d783e4102e 100644 --- a/subworkflows/local/bam_variant_calling_dysgu/main.nf +++ b/subworkflows/local/bam_variant_calling_dysgu/main.nf @@ -18,7 +18,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_DYSGU { versions = Channel.empty() // Combine cram and intervals, account for 0 intervals - cram_intervals = cram.combine(intervals).map{ it -> + cram_intervals = cram.combine(intervals).map { it -> bed_gz = it.size() > 3 ? it[3] : [] bed_tbi = it.size() > 3 ? it[4] : [] @@ -27,12 +27,11 @@ workflow BAM_VARIANT_CALLING_GERMLINE_DYSGU { DYSGU(cram_intervals, fasta, fasta_fai, []) - dysgu_vcf = DYSGU.out.vcf // Only dysgu SV should get annotated // add variantcaller to meta map - vcf = dysgu_vcf.map{ meta, vcf -> [ meta + [ variantcaller:'dysgu' ], vcf ] } + vcf = dysgu_vcf.map { meta, vcf -> [ meta + [ variantcaller:'dysgu' ], vcf ] } versions = versions.mix(DYSGU.out.versions) From 99412bf76545c30e72afdffb23622ca471e818c3 Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Thu, 11 Jul 2024 16:23:59 +0200 Subject: [PATCH 06/18] schema_update --- nextflow_schema.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 7545b9930e..1b4df230f6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -356,7 +356,7 @@ }, "cf_ploidy": { "type": "string", - "default": "2", + "default": 2, "fa_icon": "fas fa-bacon", "help_text": "In case of doubt, you can set different values and Control-FREEC will select the one that explains most observed CNAs Example: ploidy=2 , ploidy=2,3,4. For more details, see the [manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html).", "description": "Genome ploidy used by ControlFREEC", @@ -1061,7 +1061,8 @@ "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).", + "default": true }, "hook_url": { "type": "string", From 67354fd912742403916dd999d6db59f8ed2d88eb Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Thu, 11 Jul 2024 16:26:23 +0200 Subject: [PATCH 07/18] update_main.nf --- subworkflows/local/bam_variant_calling_dysgu/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bam_variant_calling_dysgu/main.nf b/subworkflows/local/bam_variant_calling_dysgu/main.nf index d783e4102e..661e3d057a 100644 --- a/subworkflows/local/bam_variant_calling_dysgu/main.nf +++ b/subworkflows/local/bam_variant_calling_dysgu/main.nf @@ -7,7 +7,7 @@ include { DYSGU } from '../../../modules/nf-core/dysgu/main' // Seems to be the consensus on upstream modules implementation too -workflow BAM_VARIANT_CALLING_GERMLINE_DYSGU { +workflow BAM_VARIANT_CALLING_DYSGU { take: cram // channel: [mandatory] [ meta, cram, crai ] fasta // channel: [mandatory] [ meta, fasta ] From 21f316fe83e3fe46b264e1d13e36ebb92807ae3d Mon Sep 17 00:00:00 2001 From: poddarharsh15 <45700858+poddarharsh15@users.noreply.github.com> Date: Thu, 11 Jul 2024 17:06:12 +0200 Subject: [PATCH 08/18] Update dysgu.config Co-authored-by: Maxime U Garcia --- conf/modules/dysgu.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules/dysgu.config b/conf/modules/dysgu.config index 8af87c5a70..9aa44e572c 100644 --- a/conf/modules/dysgu.config +++ b/conf/modules/dysgu.config @@ -15,7 +15,7 @@ process { if (params.tools && params.tools.split(',').contains('dysgu')) { - withName: 'DYSGU_GERMLINE' { + withName: 'DYSGU' { ext.args = { params.wes ? "--exome" : '' } ext.prefix = { "${meta.id}.dysgu" } publishDir = [ From 98f70ca676d6d46a740b519d294b1af5dbcae6b6 Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Fri, 12 Jul 2024 11:18:52 +0200 Subject: [PATCH 09/18] tool_dysgu_add --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1b4df230f6..6d34a46f3b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -112,7 +112,7 @@ "fa_icon": "fas fa-toolbox", "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? Date: Fri, 12 Jul 2024 11:57:13 +0200 Subject: [PATCH 10/18] help_text_dysguaddded_schema --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 6d34a46f3b..2f28fe7bef 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -111,7 +111,7 @@ "type": "string", "fa_icon": "fas fa-toolbox", "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", - "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", + "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: DYSGU, Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT \n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|dysgu|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? Date: Fri, 12 Jul 2024 13:53:46 +0200 Subject: [PATCH 11/18] docs_updated_dysgu --- docs/output.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/output.md b/docs/output.md index 7f8455f95d..45562b60c1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -45,6 +45,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Structural Variants](#structural-variants) - [Manta](#manta) - [TIDDIT](#tiddit) + - [DYSGU](#dysgu) - [Sample heterogeneity, ploidy and CNVs](#sample-heterogeneity-ploidy-and-cnvs) - [ASCAT](#ascat) - [CNVKit](#cnvkit) @@ -639,6 +640,18 @@ It is optimized for analysis of germline variation in small sets of individuals +#### DYSGU + +[DYSGU](https://github.com/kcleal/dysgu) dysgu (pronounced duss-key) is a set of command line tools and python-API, for calling structural variants using paired-end or long read sequencing data. For further reading and documentation see the [DYSGU manual](https://github.com/kcleal/dysgu/blob/master/README.rst). + +
+Output files for normal samples + +**Output directory: `{outdir}/variantcalling/dysgu//`** + +- `.dysgu.vcf.gz` and `.dysgu.vcf.gz.tbi` + - VCF with tabix index containing SV calls + ### Sample heterogeneity, ploidy and CNVs #### ASCAT From 787728844eb6320c7eacdf20ec02a73d56f3a530 Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Mon, 9 Sep 2024 10:04:35 +0200 Subject: [PATCH 12/18] interval_added --- subworkflows/local/bam_variant_calling_germline_all/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index fa2e44c571..b03fa3dd12 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -198,7 +198,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { BAM_VARIANT_CALLING_DYSGU ( cram, fasta, - fasta_fai + fasta_fai, + intervals ) vcf_dysgu = BAM_VARIANT_CALLING_GERMLINE_DYSGU.out.vcf From cf40cb450fc1fa06e945af9081c32e675dcbd742 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 10 Sep 2024 07:50:14 +0000 Subject: [PATCH 13/18] Simplify the config of DYSGU and include from nextflow.config --- conf/modules/dysgu.config | 18 ++++++++---------- nextflow.config | 1 + 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/conf/modules/dysgu.config b/conf/modules/dysgu.config index 9aa44e572c..f637ae4cc3 100644 --- a/conf/modules/dysgu.config +++ b/conf/modules/dysgu.config @@ -14,15 +14,13 @@ // DYSGU process { - if (params.tools && params.tools.split(',').contains('dysgu')) { - withName: 'DYSGU' { - ext.args = { params.wes ? "--exome" : '' } - ext.prefix = { "${meta.id}.dysgu" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/dysgu/${meta.id}" }, - pattern: "*{dysgu}.{vcf.gz,vcf.gz.tbi}" - ] - } + withName: 'DYSGU' { + ext.args = { params.wes ? "--exome" : '' } + ext.prefix = { "${meta.id}.dysgu" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/dysgu/${meta.id}" }, + pattern: "*.{vcf.gz,vcf.gz.tbi}" + ] } } diff --git a/nextflow.config b/nextflow.config index 84c8a75297..6c92634357 100644 --- a/nextflow.config +++ b/nextflow.config @@ -428,6 +428,7 @@ includeConfig 'conf/modules/ascat.config' includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/controlfreec.config' includeConfig 'conf/modules/deepvariant.config' +includeConfig 'conf/modules/dysgu.config' includeConfig 'conf/modules/freebayes.config' includeConfig 'conf/modules/haplotypecaller.config' includeConfig 'conf/modules/joint_germline.config' From c49561ee4eaf63cf037e50abd4a3595fbc5e2c45 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 10 Sep 2024 07:51:31 +0000 Subject: [PATCH 14/18] Using separate input channels for fasta and fasta_fai in DYSGU module --- modules/nf-core/dysgu/main.nf | 3 ++- subworkflows/local/bam_variant_calling_dysgu/main.nf | 12 +----------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/modules/nf-core/dysgu/main.nf b/modules/nf-core/dysgu/main.nf index 6aa1deea95..17c4d381c3 100644 --- a/modules/nf-core/dysgu/main.nf +++ b/modules/nf-core/dysgu/main.nf @@ -9,7 +9,8 @@ process DYSGU { input: tuple val(meta), path(input), path(index) - tuple val(meta2), path(fasta), path(fai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path('*.vcf.gz') , emit: vcf diff --git a/subworkflows/local/bam_variant_calling_dysgu/main.nf b/subworkflows/local/bam_variant_calling_dysgu/main.nf index 661e3d057a..68a186c41e 100644 --- a/subworkflows/local/bam_variant_calling_dysgu/main.nf +++ b/subworkflows/local/bam_variant_calling_dysgu/main.nf @@ -12,20 +12,11 @@ workflow BAM_VARIANT_CALLING_DYSGU { cram // channel: [mandatory] [ meta, cram, crai ] fasta // channel: [mandatory] [ meta, fasta ] fasta_fai // channel: [mandatory] [ meta, fasta_fai ] - intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi] or [ [], []] if no intervals; intervals file contains all intervals main: versions = Channel.empty() - // Combine cram and intervals, account for 0 intervals - cram_intervals = cram.combine(intervals).map { it -> - bed_gz = it.size() > 3 ? it[3] : [] - bed_tbi = it.size() > 3 ? it[4] : [] - - [it[0], it[1], it[2], bed_gz, bed_tbi] - } - - DYSGU(cram_intervals, fasta, fasta_fai, []) + DYSGU(cram, fasta, fasta_fai) dysgu_vcf = DYSGU.out.vcf @@ -37,6 +28,5 @@ workflow BAM_VARIANT_CALLING_DYSGU { emit: vcf - versions } From bc3a3f99691e2c9b1769e236d5a07d228623d1e3 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 10 Sep 2024 07:52:56 +0000 Subject: [PATCH 15/18] Removing intervals from input to BAM_VARIANT_CALLING_DYSGU --- .../local/bam_variant_calling_germline_all/main.nf | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index b03fa3dd12..3c47fe16ca 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -198,12 +198,11 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { BAM_VARIANT_CALLING_DYSGU ( cram, fasta, - fasta_fai, - intervals + fasta_fai ) - vcf_dysgu = BAM_VARIANT_CALLING_GERMLINE_DYSGU.out.vcf - versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_DYSGU.out.versions) + vcf_dysgu = BAM_VARIANT_CALLING_DYSGU.out.vcf + versions = versions.mix(BAM_VARIANT_CALLING_DYSGU.out.versions) } // SENTIEON DNASCOPE From 4ec3ece9dd403a54cd1efa46f2738b8815658a83 Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Tue, 10 Sep 2024 10:42:01 +0200 Subject: [PATCH 16/18] version_update_dv --- modules/nf-core/deepvariant/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf index 507b6c1174..e392fc2021 100644 --- a/modules/nf-core/deepvariant/main.nf +++ b/modules/nf-core/deepvariant/main.nf @@ -3,7 +3,7 @@ process DEEPVARIANT { label 'process_high' //Conda is not supported at the moment - container "nf-core/deepvariant:1.5.0" + container "nf-core/deepvariant:1.6.1" input: tuple val(meta), path(input), path(index), path(intervals) @@ -38,8 +38,8 @@ process DEEPVARIANT { --output_gvcf=${prefix}.g.vcf.gz \\ ${args} \\ ${regions} \\ - --intermediate_results_dir=. \\ - --num_shards=${task.cpus} + --intermediate_results_dir=tmp \\ + --num_shards=1 cat <<-END_VERSIONS > versions.yml "${task.process}": From ed8f8bac34d028af2ab500400da3eb2fc303ae32 Mon Sep 17 00:00:00 2001 From: Harsh Poddar Date: Tue, 10 Sep 2024 10:42:39 +0200 Subject: [PATCH 17/18] removed_spaces --- modules/nf-core/dragmap/align/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/dragmap/align/main.nf b/modules/nf-core/dragmap/align/main.nf index 30e47992f3..7c831063c2 100644 --- a/modules/nf-core/dragmap/align/main.nf +++ b/modules/nf-core/dragmap/align/main.nf @@ -3,7 +3,7 @@ process DRAGMAP_ALIGN { label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:df80ed8d23d0a2c43181a2b3dd1b39f2d00fab5c-0': 'biocontainers/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:df80ed8d23d0a2c43181a2b3dd1b39f2d00fab5c-0' }" From c5e3d656254da43c51e3ae557c190970553958fb Mon Sep 17 00:00:00 2001 From: poddarharsh15 <45700858+poddarharsh15@users.noreply.github.com> Date: Tue, 10 Sep 2024 13:01:38 +0200 Subject: [PATCH 18/18] Update main.nf --- modules/nf-core/deepvariant/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf index e392fc2021..ed95f066be 100644 --- a/modules/nf-core/deepvariant/main.nf +++ b/modules/nf-core/deepvariant/main.nf @@ -3,7 +3,7 @@ process DEEPVARIANT { label 'process_high' //Conda is not supported at the moment - container "nf-core/deepvariant:1.6.1" + container "nf-core/deepvariant:1.5.0" input: tuple val(meta), path(input), path(index), path(intervals)