From f500911969abfa36907ab2f60dbe5664b5527a06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Wed, 4 Dec 2024 11:38:06 +0100 Subject: [PATCH 01/18] feat: install muse --- modules.json | 10 +++ modules/nf-core/muse/call/environment.yml | 5 ++ modules/nf-core/muse/call/main.nf | 50 ++++++++++++ modules/nf-core/muse/call/meta.yml | 72 +++++++++++++++++ modules/nf-core/muse/call/tests/main.nf.test | 72 +++++++++++++++++ .../nf-core/muse/call/tests/main.nf.test.snap | 68 ++++++++++++++++ modules/nf-core/muse/call/tests/tags.yml | 2 + modules/nf-core/muse/sump/environment.yml | 6 ++ modules/nf-core/muse/sump/main.nf | 54 +++++++++++++ modules/nf-core/muse/sump/meta.yml | 67 ++++++++++++++++ modules/nf-core/muse/sump/tests/main.nf.test | 78 +++++++++++++++++++ .../nf-core/muse/sump/tests/main.nf.test.snap | 49 ++++++++++++ .../nf-core/muse/sump/tests/nextflow.config | 12 +++ modules/nf-core/muse/sump/tests/tags.yml | 2 + 14 files changed, 547 insertions(+) create mode 100644 modules/nf-core/muse/call/environment.yml create mode 100644 modules/nf-core/muse/call/main.nf create mode 100644 modules/nf-core/muse/call/meta.yml create mode 100644 modules/nf-core/muse/call/tests/main.nf.test create mode 100644 modules/nf-core/muse/call/tests/main.nf.test.snap create mode 100644 modules/nf-core/muse/call/tests/tags.yml create mode 100644 modules/nf-core/muse/sump/environment.yml create mode 100644 modules/nf-core/muse/sump/main.nf create mode 100644 modules/nf-core/muse/sump/meta.yml create mode 100644 modules/nf-core/muse/sump/tests/main.nf.test create mode 100644 modules/nf-core/muse/sump/tests/main.nf.test.snap create mode 100644 modules/nf-core/muse/sump/tests/nextflow.config create mode 100644 modules/nf-core/muse/sump/tests/tags.yml diff --git a/modules.json b/modules.json index 75fec0dbc7..b774b62291 100644 --- a/modules.json +++ b/modules.json @@ -349,6 +349,16 @@ "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] }, + "muse/call": { + "branch": "master", + "git_sha": "219299d7fc22a55cd3a07c2122044e7ca24b815f", + "installed_by": ["modules"] + }, + "muse/sump": { + "branch": "master", + "git_sha": "8e17f287034692420127d807c7631ae5ca4b9d0c", + "installed_by": ["modules"] + }, "ngscheckmate/ncm": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/muse/call/environment.yml b/modules/nf-core/muse/call/environment.yml new file mode 100644 index 0000000000..5bc34c1036 --- /dev/null +++ b/modules/nf-core/muse/call/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::muse=2.1.2" diff --git a/modules/nf-core/muse/call/main.nf b/modules/nf-core/muse/call/main.nf new file mode 100644 index 0000000000..b4559761ab --- /dev/null +++ b/modules/nf-core/muse/call/main.nf @@ -0,0 +1,50 @@ +process MUSE_CALL { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9f/9f0ebb574ef5eed2a6e034f1b2feea6c252d1ab0c8bc5135a669059aa1f4d2ca/data': + 'community.wave.seqera.io/library/muse:6637291dcbb0bdb8' }" + + input: + tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai) + tuple val(meta2), path(reference) + + output: + tuple val(meta), path("*.MuSE.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + MuSE \\ + call \\ + $args \\ + -f $reference \\ + -O ${prefix} \\ + -n $task.cpus \\ + $tumor_bam \\ + $normal_bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MuSE: \$( MuSE --version | sed -e "s/MuSE, version //g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.MuSE.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MuSE: \$( MuSE --version | sed -e "s/MuSE, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/muse/call/meta.yml b/modules/nf-core/muse/call/meta.yml new file mode 100644 index 0000000000..6733677dbd --- /dev/null +++ b/modules/nf-core/muse/call/meta.yml @@ -0,0 +1,72 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "muse_call" +description: pre-filtering and calculating position-specific summary statistics using + the Markov substitution model +keywords: + - variant calling + - somatic + - wgs + - wxs + - vcf +tools: + - "MuSE": + description: "Somatic point mutation caller based on Markov substitution model + for molecular evolution" + homepage: "https://bioinformatics.mdanderson.org/public-software/muse/" + documentation: "https://github.com/wwylab/MuSE" + tool_dev_url: "https://github.com/wwylab/MuSE" + doi: "10.1101/gr.278456.123" + licence: ["https://github.com/danielfan/MuSE/blob/master/LICENSE"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - tumor_bam: + type: file + description: Sorted tumor BAM file + pattern: "*.bam" + - tumor_bai: + type: file + description: Index file for the tumor BAM file + pattern: "*.bai" + - normal_bam: + type: file + description: Sorted matched normal BAM file + pattern: "*.bam" + - normal_bai: + type: file + description: Index file for the normal BAM file + pattern: "*.bai" + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. `[ id:'test' ]` + - reference: + type: file + description: reference genome file + pattern: ".fasta" +output: + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.MuSE.txt": + type: file + description: position-specific summary statistics + pattern: "*.MuSE.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/modules/nf-core/muse/call/tests/main.nf.test b/modules/nf-core/muse/call/tests/main.nf.test new file mode 100644 index 0000000000..b5e441ec32 --- /dev/null +++ b/modules/nf-core/muse/call/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process MUSE_CALL" + script "../main.nf" + process "MUSE_CALL" + + tag "modules" + tag "modules_nfcore" + tag "muse" + tag "muse/call" + + test("human - bam") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'reference' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("human - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'reference' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/muse/call/tests/main.nf.test.snap b/modules/nf-core/muse/call/tests/main.nf.test.snap new file mode 100644 index 0000000000..ead8906a9c --- /dev/null +++ b/modules/nf-core/muse/call/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "human - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-29T14:30:48.292828" + }, + "human - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,3a38ee9131a217cc56199bd4a6b18e1d" + ] + ], + "1": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,3a38ee9131a217cc56199bd4a6b18e1d" + ] + ], + "versions": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-29T14:30:32.522553" + } +} \ No newline at end of file diff --git a/modules/nf-core/muse/call/tests/tags.yml b/modules/nf-core/muse/call/tests/tags.yml new file mode 100644 index 0000000000..4e04a91a4a --- /dev/null +++ b/modules/nf-core/muse/call/tests/tags.yml @@ -0,0 +1,2 @@ +muse/call: + - "modules/nf-core/muse/call/**" diff --git a/modules/nf-core/muse/sump/environment.yml b/modules/nf-core/muse/sump/environment.yml new file mode 100644 index 0000000000..4c481284ea --- /dev/null +++ b/modules/nf-core/muse/sump/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::muse=2.1.2 + - bioconda::tabix=1.11 # needed for bgzip diff --git a/modules/nf-core/muse/sump/main.nf b/modules/nf-core/muse/sump/main.nf new file mode 100644 index 0000000000..1980003b84 --- /dev/null +++ b/modules/nf-core/muse/sump/main.nf @@ -0,0 +1,54 @@ +process MUSE_SUMP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/35/3567f6162ff718c648175c5e7b5f848eaa27811d0cb3ad53def8f0a1c8893efa/data': + 'community.wave.seqera.io/library/muse_tabix:df58ca78bd9447b7' }" + + input: + tuple val(meta), path(muse_call_txt) + tuple val(meta2), path(ref_vcf), path(ref_vcf_tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // -G for WGS data and -E for WES data + def args2 = task.ext.args2 ?: '' // args for bgzip + def prefix = task.ext.prefix ?: "${meta.id}" + """ + MuSE \\ + sump \\ + $args \\ + -I $muse_call_txt \\ + -n $task.cpus \\ + -D $ref_vcf \\ + -O ${prefix}.vcf + + bgzip $args2 --threads $task.cpus ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MuSE: \$( MuSE --version | sed -e "s/MuSE, version //g" ) + bgzip: \$( bgzip --version | sed -e "s/bgzip (htslib) //g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MuSE: \$( MuSE --version | sed -e "s/MuSE, version //g" ) + bgzip: \$( bgzip --version | sed -e "s/bgzip (htslib) //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/muse/sump/meta.yml b/modules/nf-core/muse/sump/meta.yml new file mode 100644 index 0000000000..9e938e1114 --- /dev/null +++ b/modules/nf-core/muse/sump/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "muse_sump" +description: Computes tier-based cutoffs from a sample-specific error model which + is generated by muse/call and reports the finalized variants +keywords: + - variant calling + - somatic + - wgs + - wxs + - vcf +tools: + - "MuSE": + description: "Somatic point mutation caller based on Markov substitution model + for molecular evolution" + homepage: "https://bioinformatics.mdanderson.org/public-software/muse/" + documentation: "https://github.com/wwylab/MuSE" + tool_dev_url: "https://github.com/wwylab/MuSE" + doi: "10.1101/gr.278456.123" + licence: ["https://github.com/danielfan/MuSE/blob/master/LICENSE"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - muse_call_txt: + type: file + description: single input file generated by 'MuSE call' + pattern: "*.MuSE.txt" + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. `[ id:'test' ]` + - ref_vcf: + type: file + description: | + dbSNP vcf file that should be bgzip compressed, tabix indexed and + based on the same reference genome used in 'MuSE call' + pattern: ".vcf.gz" + - ref_vcf_tbi: + type: file + description: Tabix index for the dbSNP vcf file + pattern: ".vcf.gz.tbi" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + pattern: "*.vcf" + - "*.vcf.gz": + type: map + description: bgzipped vcf file with called variants + pattern: "*.vcf" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/modules/nf-core/muse/sump/tests/main.nf.test b/modules/nf-core/muse/sump/tests/main.nf.test new file mode 100644 index 0000000000..47640d3bd8 --- /dev/null +++ b/modules/nf-core/muse/sump/tests/main.nf.test @@ -0,0 +1,78 @@ +nextflow_process { + + name "Test Process MUSE_SUMP" + script "../main.nf" + process "MUSE_SUMP" + + tag "modules" + tag "modules_nfcore" + tag "muse" + tag "muse/sump" + + test("human - txt") { + + config "./nextflow.config" + + when { + params { + module_args = '-E' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/muse/MuSE-call.chr21.hg38.paired_end.recal.MuSE.txt', checkIfExists: true) + ] + input[1] = [ + [ id:'reference' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + path(process.out.vcf.get(0).get(1)).vcf.header.getColumnCount(), + path(process.out.vcf.get(0).get(1)).vcf.summary + ).match() + } + ) + } + + } + + test("human - txt - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/muse/MuSE-call.chr21.hg38.paired_end.recal.MuSE.txt', checkIfExists: true) + ] + input[1] = [ + [ id:'reference' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/muse/sump/tests/main.nf.test.snap b/modules/nf-core/muse/sump/tests/main.nf.test.snap new file mode 100644 index 0000000000..4da4356148 --- /dev/null +++ b/modules/nf-core/muse/sump/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "human - txt - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,4d667cb8f2f96c5705b1e44affdd7330" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,4d667cb8f2f96c5705b1e44affdd7330" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-29T14:52:29.011666" + }, + "human - txt": { + "content": [ + [ + "versions.yml:md5,4d667cb8f2f96c5705b1e44affdd7330" + ], + 11, + "VcfFile [chromosomes=[], sampleCount=2, variantCount=0, phased=true, phasedAutodetect=true]" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-02T16:29:22.506313" + } +} \ No newline at end of file diff --git a/modules/nf-core/muse/sump/tests/nextflow.config b/modules/nf-core/muse/sump/tests/nextflow.config new file mode 100644 index 0000000000..6d29ad9187 --- /dev/null +++ b/modules/nf-core/muse/sump/tests/nextflow.config @@ -0,0 +1,12 @@ +process { + + withName: 'MUSE_SUMP' { + ext.args = params.module_args + stageInMode = 'copy' + } + + withName: 'MUSE_SUMP_WGS' { + ext.args = '-G' + } + +} diff --git a/modules/nf-core/muse/sump/tests/tags.yml b/modules/nf-core/muse/sump/tests/tags.yml new file mode 100644 index 0000000000..6c833ca13c --- /dev/null +++ b/modules/nf-core/muse/sump/tests/tags.yml @@ -0,0 +1,2 @@ +muse/sump: + - "modules/nf-core/muse/sump/**" From b5421acaa33b74e13acb3f223829dc147560e016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Wed, 4 Dec 2024 11:38:21 +0100 Subject: [PATCH 02/18] feat: add muse variant calling sbwf --- .../bam_variant_calling_somatic_all/main.nf | 19 ++++++++- .../bam_variant_calling_somatic_muse/main.nf | 40 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 subworkflows/local/bam_variant_calling_somatic_muse/main.nf diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index cdfabfc3ac..bc60bcb4e6 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -10,6 +10,7 @@ include { BAM_VARIANT_CALLING_SOMATIC_ASCAT } from '../bam_variant_c include { BAM_VARIANT_CALLING_SOMATIC_CONTROLFREEC } from '../bam_variant_calling_somatic_controlfreec/main' include { BAM_VARIANT_CALLING_SOMATIC_MANTA } from '../bam_variant_calling_somatic_manta/main' include { BAM_VARIANT_CALLING_SOMATIC_MUTECT2 } from '../bam_variant_calling_somatic_mutect2/main' +include { BAM_VARIANT_CALLING_SOMATIC_MUSE } from '../bam_variant_calling_somatic_muse/main' include { BAM_VARIANT_CALLING_SOMATIC_STRELKA } from '../bam_variant_calling_somatic_strelka/main' include { BAM_VARIANT_CALLING_SOMATIC_TIDDIT } from '../bam_variant_calling_somatic_tiddit/main' include { MSISENSORPRO_MSISOMATIC } from '../../../modules/nf-core/msisensorpro/msisomatic/main' @@ -53,6 +54,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { out_msisensorpro = Channel.empty() vcf_mutect2 = Channel.empty() vcf_tiddit = Channel.empty() + vcf_muse = Channel.empty() if (tools.split(',').contains('ascat')) { BAM_VARIANT_CALLING_SOMATIC_ASCAT( @@ -209,6 +211,19 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.versions) } + // MuSE + if (tools.split(',').contains('muse')) { + BAM_VARIANT_CALLING_SOMATIC_MUSE( + cram, + fasta, + dbsnp, + dbsnp_tbi, + ) + + vcf_muse = BAM_VARIANT_CALLING_SOMATIC_MUSE.out.vcf + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MUSE.out.versions) + } + // TIDDIT if (tools.split(',').contains('tiddit')) { BAM_VARIANT_CALLING_SOMATIC_TIDDIT( @@ -228,7 +243,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { vcf_manta, vcf_mutect2, vcf_strelka, - vcf_tiddit + vcf_tiddit, + vcf_muse ) emit: @@ -239,6 +255,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { vcf_mutect2 vcf_strelka vcf_tiddit + vcf_muse versions } diff --git a/subworkflows/local/bam_variant_calling_somatic_muse/main.nf b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf new file mode 100644 index 0000000000..c42b46cf41 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf @@ -0,0 +1,40 @@ +// +// MuSE tumor-normal variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { MUSE_CALL } from '../../../modules/nf-core/muse/call/main' +include { MUSE_SUMP } from '../../../modules/nf-core/muse/sump/main' + +workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { + take: + cram // channel: [mandatory] [ meta, normal_cram, normal_crai, tumor_cram, tumor_crai ] + fasta // channel: [mandatory] [ meta, fasta ] + dbsnp // channel: [optional] [ dbsnp ] + dbsnp_tbi // channel: [optional] [ dbsnp_tbi ] + + main: + versions = Channel.empty() + + MUSE_CALL( + cram, + fasta + ) + + MUSE_SUMP( + MUSE_CALL.out.txt, + dbsnp.map{ it -> [ [ id:it.baseName ], it, dbsnp_tbi ] } + ) + + // Mix intervals and no_intervals channels together + vcf = Channel.empty().mix(MUSE_SUMP.out.vcf) + .map{ meta, vcf -> [ meta + [ variantcaller: 'muse' ], vcf ] } + + versions = versions.mix(MUSE_CALL.out.versions) + versions = versions.mix(MUSE_SUMP.out.versions) + + emit: + vcf + versions +} From d1940b245ac6738a06d8646c266dc1dd0e2d2175 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Wed, 4 Dec 2024 11:38:38 +0100 Subject: [PATCH 03/18] feat: add muse --- conf/modules/muse.config | 22 ++++++++++++++++++++++ conf/test_full.config | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 conf/modules/muse.config diff --git a/conf/modules/muse.config b/conf/modules/muse.config new file mode 100644 index 0000000000..f14cf04c60 --- /dev/null +++ b/conf/modules/muse.config @@ -0,0 +1,22 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MUSE + +process { + + withName: 'MUSE_SUMP' { + ext.args = { params.wes ? '-E' : '-G' } + } + +} diff --git a/conf/test_full.config b/conf/test_full.config index 1ba5ad2c78..8febf40be4 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -18,7 +18,7 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/HCC1395_WXS_somatic_full_test.csv' // Other params - tools = 'ngscheckmate,strelka,mutect2,freebayes,ascat,manta,cnvkit,tiddit,controlfreec,vep,snpeff' + tools = 'ngscheckmate,strelka,mutect2,freebayes,ascat,manta,cnvkit,tiddit,controlfreec,vep,snpeff,muse' split_fastq = 20000000 intervals = 's3://ngi-igenomes/test-data/sarek/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed' wes = true From 66bc7861dfbdd7ae0fdef3b56f81a323898bdca9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Wed, 4 Dec 2024 12:32:54 +0100 Subject: [PATCH 04/18] feat: add muse to schema --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 2e66ccdf53..940b961742 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -112,7 +112,7 @@ "fa_icon": "fas fa-toolbox", "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Lofreq, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|lofreq|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? Date: Wed, 4 Dec 2024 16:06:02 +0100 Subject: [PATCH 05/18] feat: convert to bam --- .../bam_variant_calling_somatic_muse/main.nf | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/bam_variant_calling_somatic_muse/main.nf b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf index c42b46cf41..02bb0fdb32 100644 --- a/subworkflows/local/bam_variant_calling_somatic_muse/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf @@ -6,19 +6,53 @@ include { MUSE_CALL } from '../../../modules/nf-core/muse/call/main' include { MUSE_SUMP } from '../../../modules/nf-core/muse/sump/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM_TUMOR } from '../../../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM_NORMAL } from '../../../modules/nf-core/samtools/convert/main' workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { take: cram // channel: [mandatory] [ meta, normal_cram, normal_crai, tumor_cram, tumor_crai ] fasta // channel: [mandatory] [ meta, fasta ] + fai // channel: [mandatory] [ meta, fai ] dbsnp // channel: [optional] [ dbsnp ] dbsnp_tbi // channel: [optional] [ dbsnp_tbi ] main: versions = Channel.empty() + CRAM_TO_BAM_TUMOR( + cram.map{ meta, normal_cram, normal_crai,tumor_cram, tumor_crai -> [ meta + [ tobam: 'tumor' ], tumor_cram, tumor_crai ] }, + fasta, + fai + ) + + CRAM_TO_BAM_NORMAL( + cram.map{ meta, normal_cram, normal_crai,tumor_cram, tumor_crai -> [ meta + [ tobam: 'normal' ], normal_cram, normal_crai ] }, + fasta, + fai + ) + + ch_normal_bam = CRAM_TO_BAM_NORMAL.out.bam + ch_normal_bai = CRAM_TO_BAM_NORMAL.out.bai + ch_tumor_bam = CRAM_TO_BAM_TUMOR.out.bam + ch_tumor_bai = CRAM_TO_BAM_TUMOR.out.bai + + // Combine normal BAM and BAI + ch_normal = ch_normal_bam.join(ch_normal_bai, by: [0]) // Join by meta + + // Combine tumor BAM and BAI + ch_tumor = ch_tumor_bam.join(ch_tumor_bai, by: [0]) // Join by meta + + // Combine normal and tumor data + ch_combined = ch_normal.join(ch_tumor, by: [0]) // Join by meta + + // Rearrange the elements to match the desired output + ch_bam = ch_combined.map { meta, normal_bam, normal_bai, tumor_bam, tumor_bai -> + [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + } + MUSE_CALL( - cram, + ch_bam, fasta ) @@ -31,6 +65,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { vcf = Channel.empty().mix(MUSE_SUMP.out.vcf) .map{ meta, vcf -> [ meta + [ variantcaller: 'muse' ], vcf ] } + versions = versions.mix(CRAM_TO_BAM_NORMAL.out.versions) + versions = versions.mix(CRAM_TO_BAM_TUMOR.out.versions) versions = versions.mix(MUSE_CALL.out.versions) versions = versions.mix(MUSE_SUMP.out.versions) From 2c1b86aaa027c766dab4fa592ff5d4eb8978c25e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Wed, 4 Dec 2024 16:11:04 +0100 Subject: [PATCH 06/18] feat: add fai as input --- subworkflows/local/bam_variant_calling_somatic_all/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index bc60bcb4e6..1766e8d3bb 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -216,6 +216,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { BAM_VARIANT_CALLING_SOMATIC_MUSE( cram, fasta, + fasta_fai, dbsnp, dbsnp_tbi, ) From d40410532a8a3aeff840e3b221786c614e91f5c3 Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 10:38:47 +0100 Subject: [PATCH 07/18] add conf --- conf/modules/muse.config | 6 ++++++ nextflow.config | 1 + .../bam_variant_calling_somatic_muse/main.nf | 16 +++++++++------- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/conf/modules/muse.config b/conf/modules/muse.config index f14cf04c60..a4423b3daa 100644 --- a/conf/modules/muse.config +++ b/conf/modules/muse.config @@ -17,6 +17,12 @@ process { withName: 'MUSE_SUMP' { ext.args = { params.wes ? '-E' : '-G' } + ext.when = { params.tools && params.tools.contains('muse') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/muse/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + ] } } diff --git a/nextflow.config b/nextflow.config index f01ed136d2..712167677e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -465,6 +465,7 @@ includeConfig 'conf/modules/manta.config' includeConfig 'conf/modules/mpileup.config' includeConfig 'conf/modules/msisensorpro.config' includeConfig 'conf/modules/mutect2.config' +includeConfig 'conf/modules/muse.config' includeConfig 'conf/modules/sentieon_dnascope.config' includeConfig 'conf/modules/sentieon_dnascope_joint_germline.config' includeConfig 'conf/modules/sentieon_haplotyper.config' diff --git a/subworkflows/local/bam_variant_calling_somatic_muse/main.nf b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf index 02bb0fdb32..093647265f 100644 --- a/subworkflows/local/bam_variant_calling_somatic_muse/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf @@ -4,9 +4,9 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { MUSE_CALL } from '../../../modules/nf-core/muse/call/main' -include { MUSE_SUMP } from '../../../modules/nf-core/muse/sump/main' -include { SAMTOOLS_CONVERT as CRAM_TO_BAM_TUMOR } from '../../../modules/nf-core/samtools/convert/main' +include { MUSE_CALL } from '../../../modules/nf-core/muse/call/main' +include { MUSE_SUMP } from '../../../modules/nf-core/muse/sump/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM_TUMOR } from '../../../modules/nf-core/samtools/convert/main' include { SAMTOOLS_CONVERT as CRAM_TO_BAM_NORMAL } from '../../../modules/nf-core/samtools/convert/main' workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { @@ -14,20 +14,22 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { cram // channel: [mandatory] [ meta, normal_cram, normal_crai, tumor_cram, tumor_crai ] fasta // channel: [mandatory] [ meta, fasta ] fai // channel: [mandatory] [ meta, fai ] - dbsnp // channel: [optional] [ dbsnp ] - dbsnp_tbi // channel: [optional] [ dbsnp_tbi ] + dbsnp // channel: [optional] [ dbsnp ] + dbsnp_tbi // channel: [optional] [ dbsnp_tbi ] main: versions = Channel.empty() CRAM_TO_BAM_TUMOR( - cram.map{ meta, normal_cram, normal_crai,tumor_cram, tumor_crai -> [ meta + [ tobam: 'tumor' ], tumor_cram, tumor_crai ] }, + cram.map{ meta, normal_cram, normal_crai,tumor_cram, tumor_crai -> + [ meta + [ tobam: 'tumor' ], tumor_cram, tumor_crai ] }, fasta, fai ) CRAM_TO_BAM_NORMAL( - cram.map{ meta, normal_cram, normal_crai,tumor_cram, tumor_crai -> [ meta + [ tobam: 'normal' ], normal_cram, normal_crai ] }, + cram.map{ meta, normal_cram, normal_crai,tumor_cram, tumor_crai -> + [ meta + [ tobam: 'normal' ], normal_cram, normal_crai ] }, fasta, fai ) From 23cc48150cafcf81a96071cfc17136e910305f39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20B=C3=A4uerle?= <45968370+famosab@users.noreply.github.com> Date: Thu, 5 Dec 2024 10:39:12 +0100 Subject: [PATCH 08/18] Update nextflow_schema.json Co-authored-by: Friederike Hanssen --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 940b961742..61961a3bab 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -327,7 +327,7 @@ }, "cf_ploidy": { "type": "string", - "default": "2", + "default": 2, "fa_icon": "fas fa-bacon", "help_text": "In case of doubt, you can set different values and Control-FREEC will select the one that explains most observed CNAs Example: ploidy=2 , ploidy=2,3,4. For more details, see the [manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html).", "description": "Genome ploidy used by ControlFREEC" From 8c722e6af0a7529b2c2b16183c1c3f37b525351a Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 15:34:39 +0100 Subject: [PATCH 09/18] fix: swap to cram_tumor and cram_normal --- conf/modules/muse.config | 8 +++++++ .../bam_variant_calling_somatic_all/main.nf | 9 ++++--- .../bam_variant_calling_somatic_muse/main.nf | 24 +++++++------------ 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/conf/modules/muse.config b/conf/modules/muse.config index a4423b3daa..573227b7b4 100644 --- a/conf/modules/muse.config +++ b/conf/modules/muse.config @@ -25,4 +25,12 @@ process { ] } + withName: 'CRAM_TO_BAM_TUMOR' { + ext.prefix = {"${meta.tumor_id}"} + } + + withName: 'CRAM_TO_BAM_NORMAL' { + ext.prefix = {"${meta.normal_id}"} + } + } diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index 1766e8d3bb..f0138c67c5 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -213,15 +213,18 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { // MuSE if (tools.split(',').contains('muse')) { + cram_normal = cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, normal_cram, normal_crai ] } + cram_tumor = cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, tumor_cram, tumor_crai ] } BAM_VARIANT_CALLING_SOMATIC_MUSE( - cram, + cram_normal, + cram_tumor, fasta, fasta_fai, dbsnp, - dbsnp_tbi, + dbsnp_tbi ) - vcf_muse = BAM_VARIANT_CALLING_SOMATIC_MUSE.out.vcf + vcf_muse = BAM_VARIANT_CALLING_SOMATIC_MUSE.out.vcf versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MUSE.out.versions) } diff --git a/subworkflows/local/bam_variant_calling_somatic_muse/main.nf b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf index 093647265f..466295c4d0 100644 --- a/subworkflows/local/bam_variant_calling_somatic_muse/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf @@ -11,7 +11,8 @@ include { SAMTOOLS_CONVERT as CRAM_TO_BAM_NORMAL } from '../../../modules/nf-cor workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { take: - cram // channel: [mandatory] [ meta, normal_cram, normal_crai, tumor_cram, tumor_crai ] + cram_normal // channel: [mandatory] [ meta, normal_cram, normal_crai] + cram_tumor // channel: [mandatory] [ meta, tumor_cram, tumor_crai] fasta // channel: [mandatory] [ meta, fasta ] fai // channel: [mandatory] [ meta, fai ] dbsnp // channel: [optional] [ dbsnp ] @@ -19,17 +20,16 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { main: versions = Channel.empty() + ch_dbsnp = dbsnp.combine(dbsnp_tbi) CRAM_TO_BAM_TUMOR( - cram.map{ meta, normal_cram, normal_crai,tumor_cram, tumor_crai -> - [ meta + [ tobam: 'tumor' ], tumor_cram, tumor_crai ] }, + cram_tumor, fasta, fai ) CRAM_TO_BAM_NORMAL( - cram.map{ meta, normal_cram, normal_crai,tumor_cram, tumor_crai -> - [ meta + [ tobam: 'normal' ], normal_cram, normal_crai ] }, + cram_normal, fasta, fai ) @@ -46,12 +46,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { ch_tumor = ch_tumor_bam.join(ch_tumor_bai, by: [0]) // Join by meta // Combine normal and tumor data - ch_combined = ch_normal.join(ch_tumor, by: [0]) // Join by meta - - // Rearrange the elements to match the desired output - ch_bam = ch_combined.map { meta, normal_bam, normal_bai, tumor_bam, tumor_bai -> - [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] - } + ch_bam = ch_tumor.join(ch_normal, by: [0]) // Join by meta MUSE_CALL( ch_bam, @@ -60,12 +55,11 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { MUSE_SUMP( MUSE_CALL.out.txt, - dbsnp.map{ it -> [ [ id:it.baseName ], it, dbsnp_tbi ] } + ch_dbsnp.map{ it -> [ [ id:it.baseName ], it[0], it[1] ] } ) - // Mix intervals and no_intervals channels together - vcf = Channel.empty().mix(MUSE_SUMP.out.vcf) - .map{ meta, vcf -> [ meta + [ variantcaller: 'muse' ], vcf ] } + // add variantcaller to meta map + vcf = MUSE_SUMP.out.vcf.map{ meta, vcf -> [ meta + [ variantcaller:'muse' ], vcf ] } versions = versions.mix(CRAM_TO_BAM_NORMAL.out.versions) versions = versions.mix(CRAM_TO_BAM_TUMOR.out.versions) From 7f7dfb68577508f628595793897eabf3891d10ba Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 15:48:21 +0100 Subject: [PATCH 10/18] feat: add to changelog and docs --- CHANGELOG.md | 1 + docs/usage.md | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0876f35e9d..acc3ccd431 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [1663](https://github.com/nf-core/sarek/pull/1663) - Massive conda modules update - [1664](https://github.com/nf-core/sarek/pull/1664) - Check if flowcell ID matches for read pair - [1730](https://github.com/nf-core/sarek/pull/1730) - Enable Harshil Alignment™️ in VS Code workspace settings +- [1744](https://github.com/nf-core/sarek/pull/1744) - Add MuSE as new somatic variant caller ### Changed diff --git a/docs/usage.md b/docs/usage.md index 2d442c7e20..03a83539db 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -590,6 +590,7 @@ This list is by no means exhaustive and it will depend on the specific analysis | [CNVKit](https://cnvkit.readthedocs.io/en/stable/) | x | x | - | x | x | x | | [Control-FREEC](https://github.com/BoevaLab/FREEC) | x | x | x | - | x | x | | [MSIsensorPro](https://github.com/xjtu-omics/msisensor-pro) | x | x | x | - | - | x | +| [MuSE](https://github.com/wwylab/MuSE) | x | x | - | - | - | x | ## How to run ASCAT with whole-exome sequencing data? From 4e8ce412f97fdb3fc428162a699d9b963d12a3f4 Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 15:50:30 +0100 Subject: [PATCH 11/18] feat: add to README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 33892e0a6b..6c501e24c9 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ Depending on the options and samples provided, the pipeline can currently perfor - `Strelka2` - `TIDDIT` - `Lofreq` + - `MuSE` - Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`) - Summarise and represent QC (`MultiQC`) From 0747d4ca46f0a4d3dbafc5fcdde79fac4fed3db0 Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 15:57:18 +0100 Subject: [PATCH 12/18] feat: add muse to output --- docs/output.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/output.md b/docs/output.md index 6204ada6a5..008ed05047 100644 --- a/docs/output.md +++ b/docs/output.md @@ -44,6 +44,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Sentieon Haplotyper joint germline variant calling](#sentieon-haplotyper-joint-germline-variant-calling) - [Strelka](#strelka) - [Lofreq](#lofreq) + - [MuSE](#muse) - [Structural Variants](#structural-variants) - [Manta](#manta) - [TIDDIT](#tiddit) @@ -590,6 +591,20 @@ For further downstream analysis, take a look [here](https://github.com/Illumina/ +#### MuSE + +[MuSE](https://github.com/wwylab/MuSE) is an accurate and ultra-fast somatic mutation calling tool for whole-genome sequencing (WGS) and whole-exome sequencing (WES) data from heterogeneous tumor samples. This tool is unique in accounting for tumor heterogeneity using a sample-specific error model that improves sensitivity and specificity in mutation calling from sequencing data. For further reading see the [recently published paper](https://genome.cshlp.org/content/early/2024/05/03/gr.278456.123.long). + +
+Output files for tumor-normal samples + +**Output directory: `{outdir}/variant_calling/muse//`** + +-`_vs_.vcf.gz` +-VCF with called variants. Fields are named TUMOR and NORMAL. + +
+ ### Structural Variants #### Manta From e5e1358e23b8edc38f69fccab83e777a9869037a Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 15:57:45 +0100 Subject: [PATCH 13/18] feat: add muse to output --- docs/output.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 008ed05047..048a25b868 100644 --- a/docs/output.md +++ b/docs/output.md @@ -598,9 +598,9 @@ For further downstream analysis, take a look [here](https://github.com/Illumina/
Output files for tumor-normal samples -**Output directory: `{outdir}/variant_calling/muse//`** +**Output directory: `{outdir}/variant_calling/muse//`** --`_vs_.vcf.gz` +-`.vcf.gz` -VCF with called variants. Fields are named TUMOR and NORMAL.
From 92673c6eaa3b0056412cd2310b4f26cf6bd047ee Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 16:03:19 +0100 Subject: [PATCH 14/18] fix: changelog and citations --- CHANGELOG.md | 1 + CITATIONS.md | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index acc3ccd431..bce2ba6729 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | `samtools` | 1.20 | 1.21 | | `sentieon` | 202308.02 | 202308.03 | | `svdb` | 2.8.1 | 2.8.2 | +| `MuSE` | | 2.1.2 | ### Parameters diff --git a/CITATIONS.md b/CITATIONS.md index 1c4a22cade..4396e8c9e2 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -150,6 +150,10 @@ > Wilm et al. LoFreq: A sequence-quality aware, ultra-sensitive variant caller for uncovering cell-population heterogeneity from high-throughput sequencing datasets. Nucleic Acids Res. 2012; 40(22):11189-201. +- [MuSE](https://pubmed.ncbi.nlm.nih.gov/38589250/) + + > Ji S, Zhu T, Sethia A, Wang W. Accelerated somatic mutation calling for whole-genome and whole-exome sequencing data from heterogenous tumor samples. Genome Res. 2024 May 15;34(4):633-641. doi: 10.1101/gr.278456.123. PMID: 38589250; PMCID: PMC11146589. + ## R packages - [R](https://www.R-project.org/) From 4df5c160ad509946a444af5718a3d1e9a5ee0c95 Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 16:06:23 +0100 Subject: [PATCH 15/18] fix: update prefix --- conf/modules/muse.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules/muse.config b/conf/modules/muse.config index 573227b7b4..f4928c72b5 100644 --- a/conf/modules/muse.config +++ b/conf/modules/muse.config @@ -18,6 +18,7 @@ process { withName: 'MUSE_SUMP' { ext.args = { params.wes ? '-E' : '-G' } ext.when = { params.tools && params.tools.contains('muse') } + ext.prefix = { "${meta.id}.muse" } publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/muse/${meta.id}/" }, From 13a0521e25fe6b4d24f5696e9e67dc4aa27d4132 Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 16:43:17 +0100 Subject: [PATCH 16/18] feat: add test file for muse --- tests/test_muse.yml | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 tests/test_muse.yml diff --git a/tests/test_muse.yml b/tests/test_muse.yml new file mode 100644 index 0000000000..2d4dd109a3 --- /dev/null +++ b/tests/test_muse.yml @@ -0,0 +1,39 @@ +- name: Run variant calling on somatic sample with MuSE + command: nextflow run main.nf -profile test,tools_somatic --tools muse --outdir results + tags: + - muse + - somatic + - variant_calling + files: + - path: results/cram/sample3.bam + md5sum: 8cfa7f35990240ab663a0776ae67ecdc + - cram: results/cram/sample4.bam + md5sum: fe8ceee2ede1f9b0b4f2aaa3f1a97241 + - path: results/cram/sample3.bam.bai + md5sum: c55142e20838c3b56d93ff853a7b7189 + - cram: results/cram/sample4.bam.bai + md5sum: c6d0e6be2e5d8a6bd312517d9ba0f4af + - path: results/csv/variantcalled.csv + md5sum: 4545be9d6d7b77ba3b3f0aae5b7ca3af + - path: results/multiqc + - path: results/muse/sample4_vs_sample3.MuSE.txt + md5sum: 432e9c18fb22c3bb4d857a397bab40fc + - path: results/reports/bcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.bcftools_stats.txt + md5sum: 0a4c6d5841a55b492a50917b0610a0b3 + - path: results/reports/samtools/sample3/sample3.recal.cram.stats + md5sum: bcc229318527e414e69aaa5cd092ad9b + - path: results/reports/samtools/sample4/sample4.recal.cram.stats + md5sum: 0d1784cb4c3f14b9858247ac6128dd03 + - path: results/variant_calling/muse/sample4_vs_sample3/sample4_vs_sample3.muse.vcf.gz + contains: + [ + '##INFO=' + ] + # conda changes md5sums for test + - path: results/reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + # binary changes md5sums on reruns + - path: results/reports/mosdepth/sample3/ + - path: results/reports/mosdepth/sample4/ \ No newline at end of file From 03753509a1762713e44f3eb24408489d8726dd05 Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 16:45:29 +0100 Subject: [PATCH 17/18] fix: prettier --- tests/test_muse.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test_muse.yml b/tests/test_muse.yml index 2d4dd109a3..c76a8d4547 100644 --- a/tests/test_muse.yml +++ b/tests/test_muse.yml @@ -25,10 +25,7 @@ - path: results/reports/samtools/sample4/sample4.recal.cram.stats md5sum: 0d1784cb4c3f14b9858247ac6128dd03 - path: results/variant_calling/muse/sample4_vs_sample3/sample4_vs_sample3.muse.vcf.gz - contains: - [ - '##INFO=' - ] + contains: ['##INFO='] # conda changes md5sums for test - path: results/reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 @@ -36,4 +33,4 @@ md5sum: bc68ae4e688e9fb772b457069e604883 # binary changes md5sums on reruns - path: results/reports/mosdepth/sample3/ - - path: results/reports/mosdepth/sample4/ \ No newline at end of file + - path: results/reports/mosdepth/sample4/ From 59dde7b4d29c7ba634c674187d788e355d9bc757 Mon Sep 17 00:00:00 2001 From: famosab Date: Thu, 5 Dec 2024 16:55:22 +0100 Subject: [PATCH 18/18] feat: add to pytest --- tests/config/pytesttags.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/config/pytesttags.yml b/tests/config/pytesttags.yml index 3f8723df7e..e86a1c9d7c 100644 --- a/tests/config/pytesttags.yml +++ b/tests/config/pytesttags.yml @@ -345,6 +345,16 @@ lofreq: - tests/csv/3.0/recalibrated_tumoronly.csv - tests/test_lofreq.yml +## MuSE +muse: + - conf/modules/muse.config + - modules/nf-core/samtools/convert/** + - modules/nf-core/muse/** + - subworkflows/local/bam_variant_calling_somatic_all/** + - subworkflows/local/bam_variant_calling_somatic_muse/** + - tests/csv/3.0/recalibrated_somatic.csv + - tests/muse.yml + ## tiddit tiddit: - conf/modules/tiddit.config