diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f32a3a0e5..a0d0ce3a32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [1638](https://github.com/nf-core/sarek/pull/1638) - Added additional documentation detailing ASCAT WES usage. - [1640](https://github.com/nf-core/sarek/pull/1620) - Add `lofreq` as a tumor-only variant caller - [1642](https://github.com/nf-core/sarek/pull/1642) - Back to dev +- [1646](https://github.com/nf-core/sarek/pull/1646) - Added asmultipcf functionality for multisample ASCAT calls. - [1653](https://github.com/nf-core/sarek/pull/1653) - Updates `sarek_subway` files with `lofreq` - [1660](https://github.com/nf-core/sarek/pull/1642) - Add `--length_required` for minimal reads length with `FASTP` - [1663](https://github.com/nf-core/sarek/pull/1663) - Massive conda modules update diff --git a/conf/test/tools_somatic_ascat_asmultipcf.config b/conf/test/tools_somatic_ascat_asmultipcf.config new file mode 100644 index 0000000000..ea3402f528 --- /dev/null +++ b/conf/test/tools_somatic_ascat_asmultipcf.config @@ -0,0 +1,24 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/sarek -profile test,, --outdir +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +params { + input = "${projectDir}/tests/csv/3.0/ascat_somatic_asmultipcf.csv" + genome = 'GATK.GRCh37' + germline_resource_tbi = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi" + ascat_loci = "G1000_loci_hg38.zip" + ascat_min_base_qual = 30 + chr_dir = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz" + germline_resource = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" + intervals = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + step = 'variant_calling' + tools = 'ascat' + wes = false +} diff --git a/docs/output.md b/docs/output.md index 6204ada6a5..7bfef2a187 100644 --- a/docs/output.md +++ b/docs/output.md @@ -712,6 +712,13 @@ The output is a tab delimited text file with the following columns: The file `.cnvs.txt` contains all segments predicted by ASCAT, both those with normal copy number (nMinor = 1 and nMajor =1) and those corresponding to copy number aberrations. +--asmultipcf if this is turned on. It will run local module asmultipcf which corrects segments calls on multiple samples from the same patient. This will give you two additional output files + +- `._asmultipcf_purityploidy.txt` + - file with information about purity and ploidy corrected for multiple samples +- `._asmultipcf_segments.txt` + - file with information about copy number segments corrected for multiple samples + #### CNVKit diff --git a/modules/local/asmultipcf/environment.yml b/modules/local/asmultipcf/environment.yml new file mode 100644 index 0000000000..c436b22373 --- /dev/null +++ b/modules/local/asmultipcf/environment.yml @@ -0,0 +1,8 @@ +name: asmultipcf +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ascat=3.1.1 + - bioconda::cancerit-allelecount=4.3.0 diff --git a/modules/local/asmultipcf/main.nf b/modules/local/asmultipcf/main.nf new file mode 100644 index 0000000000..b0e24d0e7b --- /dev/null +++ b/modules/local/asmultipcf/main.nf @@ -0,0 +1,66 @@ +process ASMULTIPCF { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:ba3e6d2157eac2d38d22e62ec87675e12adb1010-0': + 'biocontainers/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:ba3e6d2157eac2d38d22e62ec87675e12adb1010-0' }" + + input: + tuple val(meta), path(tumor_logr_files), path(tumor_baf_files), path(normal_logr_file), path(normal_baf_file) + + + output: + tuple val(meta), path("*_asmultipcf_segments.txt"), emit: asmultipcf_segments + tuple val(meta), path("*_asmultipcf_purityploidy.txt"), emit: asmultipcf_purityploidy + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/bin/env Rscript + library(ASCAT) + + # Concatenate tumor LogR files + tumor_logr_data <- do.call(cbind, lapply(strsplit("${tumor_logr_files}", " "), function(file) { + read.table(file, header = TRUE, check.names = FALSE) + })) + write.table(tumor_logr_data, file = "combined_tumor_logr.txt", sep = "\t", quote = FALSE, row.names = FALSE) + + # Concatenate tumor BAF files + tumor_baf_data <- do.call(cbind, lapply(strsplit("${tumor_baf_files}", " "), function(file) { + read.table(file, header = TRUE, check.names = FALSE) + })) + write.table(tumor_baf_data, file = "combined_tumor_baf.txt", sep = "\t", quote = FALSE, row.names = FALSE) + + # Load the data + ascat.bc <- ascat.loadData( + Tumor_LogR_file = "combined_tumor_logr.txt", + Tumor_BAF_file = "combined_tumor_baf.txt", + Germline_LogR_file = "$normal_logr_file", + Germline_BAF_file = "$normal_baf_file" + ) + + # Run multi-sample segmentation + ascat.bc <- ascat.asmultipcf(ascat.bc, penalty = ${params.ascat_asmultipcf_penalty ?: 5}) + + # Run ASCAT + ascat.output <- ascat.runAscat(ascat.bc) + + # Write out segmented regions + write.table(ascat.output[["segments"]], file="${prefix}_asmultipcf_segments.txt", sep="\t", quote=FALSE, row.names=FALSE) + + # Write out purity and ploidy info + purity_ploidy <- data.frame( + Sample = names(ascat.output\$aberrantcellfraction), + Purity = unlist(ascat.output\$aberrantcellfraction), + Ploidy = unlist(ascat.output\$ploidy) + ) + write.table(purity_ploidy, file="${prefix}_asmultipcf_purityploidy.txt", sep="\t", quote=FALSE, row.names=FALSE) + + # Version export + writeLines(c("\\"${task.process}\\":", paste0(" ascat: ", packageVersion("ASCAT"))), "versions.yml") + """ +} \ No newline at end of file diff --git a/modules/local/asmultipcf/meta.yml b/modules/local/asmultipcf/meta.yml new file mode 100644 index 0000000000..db5d5b85c5 --- /dev/null +++ b/modules/local/asmultipcf/meta.yml @@ -0,0 +1,119 @@ +name: asmultipcf +description: Performs multi-sample segmentation using ASCAT +keywords: + - bam + - copy number + - cram +tools: + - ascat: + description: ASCAT is a method to derive copy number profiles of tumour cells, accounting for normal cell admixture and tumour aneuploidy. ASCAT infers tumour purity (the fraction of tumour cells) and ploidy (the amount of DNA per tumour cell), expressed as multiples of haploid genomes from SNP array or massively parallel sequencing data, and calculates whole-genome allele-specific copy number profiles (the number of copies of both parental alleles for all SNP loci across the genome). + documentation: https://github.com/VanLoo-lab/ascat/tree/master/man + tool_dev_url: https://github.com/VanLoo-lab/ascat + doi: "10.1093/bioinformatics/btaa538" + licence: ["GPL v3"] +input: + - args: + type: map + description: | + Groovy Map containing tool parameters. MUST follow the structure/keywords below and be provided via modules.config. Parameters must be set between quotes. (optional) parameters can be removed from the map, if they are not set. For default values, please check the documentation above. + + ``` + { + [ + "gender": "XX", + "genomeVersion": "hg19" + "purity": (optional), + "ploidy": (optional), + "gc_files": (optional), + "minCounts": (optional), + "BED_file": (optional) but recommended for WES, + "chrom_names": (optional), + "min_base_qual": (optional), + "min_map_qual": (optional), + "ref_fasta": (optional), + "skip_allele_counting_tumour": (optional), + "skip_allele_counting_normal": (optional) + ] + } + ``` + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_normal: + type: file + description: BAM/CRAM file, must adhere to chr1, chr2, ...chrX notation For modifying chromosome notation in bam files please follow https://josephcckuo.wordpress.com/2016/11/17/modify-chromosome-notation-in-bam-file/. + pattern: "*.{bam,cram}" + - index_normal: + type: file + description: index for normal_bam/cram + pattern: "*.{bai,crai}" + - input_tumor: + type: file + description: BAM/CRAM file, must adhere to chr1, chr2, ...chrX notation + pattern: "*.{bam,cram}" + - index_tumor: + type: file + description: index for tumor_bam/cram + pattern: "*.{bai,crai}" + - allele_files: + type: file + description: allele files for ASCAT WGS. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS + - loci_files: + type: file + description: loci files for ASCAT WGS. Loci files without chromosome notation can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS Make sure the chromosome notation matches the bam/cram input files. To add the chromosome notation to loci files (hg19/hg38) if necessary, you can run this command `if [[ $(samtools view | head -n1 | cut -f3)\" == *\"chr\"* ]]; then for i in {1..22} X; do sed -i 's/^/chr/' G1000_loci_hg19_chr_${i}.txt; done; fi` + - bed_file: + type: file + description: Bed file for ASCAT WES (optional, but recommended for WES) + - fasta: + type: file + description: Reference fasta file (optional) + - gc_file: + type: file + description: GC correction file (optional) - Used to do logR correction of the tumour sample(s) with genomic GC content + - rt_file: + type: file + description: replication timing correction file (optional, provide only in combination with gc_file) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - allelefreqs: + type: file + description: Files containing allee frequencies per chromosome + pattern: "*{alleleFrequencies_chr*.txt}" + - metrics: + type: file + description: File containing quality metrics + pattern: "*.{metrics.txt}" + - png: + type: file + description: ASCAT plots + pattern: "*.{png}" + - purityploidy: + type: file + description: File with purity and ploidy data + pattern: "*.{purityploidy.txt}" + - segments: + type: file + description: File with multi-sample segments data + pattern: "*.{asmultipcf_segments.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@aasNGC" + - "@lassefolkersen" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" +maintainers: + - "@aasNGC" + - "@lassefolkersen" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" diff --git a/modules/nf-core/asmultipcf/environment.yml b/modules/nf-core/asmultipcf/environment.yml new file mode 100644 index 0000000000..c436b22373 --- /dev/null +++ b/modules/nf-core/asmultipcf/environment.yml @@ -0,0 +1,8 @@ +name: asmultipcf +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ascat=3.1.1 + - bioconda::cancerit-allelecount=4.3.0 diff --git a/modules/nf-core/asmultipcf/main.nf b/modules/nf-core/asmultipcf/main.nf new file mode 100644 index 0000000000..b0e24d0e7b --- /dev/null +++ b/modules/nf-core/asmultipcf/main.nf @@ -0,0 +1,66 @@ +process ASMULTIPCF { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:ba3e6d2157eac2d38d22e62ec87675e12adb1010-0': + 'biocontainers/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:ba3e6d2157eac2d38d22e62ec87675e12adb1010-0' }" + + input: + tuple val(meta), path(tumor_logr_files), path(tumor_baf_files), path(normal_logr_file), path(normal_baf_file) + + + output: + tuple val(meta), path("*_asmultipcf_segments.txt"), emit: asmultipcf_segments + tuple val(meta), path("*_asmultipcf_purityploidy.txt"), emit: asmultipcf_purityploidy + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/bin/env Rscript + library(ASCAT) + + # Concatenate tumor LogR files + tumor_logr_data <- do.call(cbind, lapply(strsplit("${tumor_logr_files}", " "), function(file) { + read.table(file, header = TRUE, check.names = FALSE) + })) + write.table(tumor_logr_data, file = "combined_tumor_logr.txt", sep = "\t", quote = FALSE, row.names = FALSE) + + # Concatenate tumor BAF files + tumor_baf_data <- do.call(cbind, lapply(strsplit("${tumor_baf_files}", " "), function(file) { + read.table(file, header = TRUE, check.names = FALSE) + })) + write.table(tumor_baf_data, file = "combined_tumor_baf.txt", sep = "\t", quote = FALSE, row.names = FALSE) + + # Load the data + ascat.bc <- ascat.loadData( + Tumor_LogR_file = "combined_tumor_logr.txt", + Tumor_BAF_file = "combined_tumor_baf.txt", + Germline_LogR_file = "$normal_logr_file", + Germline_BAF_file = "$normal_baf_file" + ) + + # Run multi-sample segmentation + ascat.bc <- ascat.asmultipcf(ascat.bc, penalty = ${params.ascat_asmultipcf_penalty ?: 5}) + + # Run ASCAT + ascat.output <- ascat.runAscat(ascat.bc) + + # Write out segmented regions + write.table(ascat.output[["segments"]], file="${prefix}_asmultipcf_segments.txt", sep="\t", quote=FALSE, row.names=FALSE) + + # Write out purity and ploidy info + purity_ploidy <- data.frame( + Sample = names(ascat.output\$aberrantcellfraction), + Purity = unlist(ascat.output\$aberrantcellfraction), + Ploidy = unlist(ascat.output\$ploidy) + ) + write.table(purity_ploidy, file="${prefix}_asmultipcf_purityploidy.txt", sep="\t", quote=FALSE, row.names=FALSE) + + # Version export + writeLines(c("\\"${task.process}\\":", paste0(" ascat: ", packageVersion("ASCAT"))), "versions.yml") + """ +} \ No newline at end of file diff --git a/modules/nf-core/asmultipcf/meta.yml b/modules/nf-core/asmultipcf/meta.yml new file mode 100644 index 0000000000..db5d5b85c5 --- /dev/null +++ b/modules/nf-core/asmultipcf/meta.yml @@ -0,0 +1,119 @@ +name: asmultipcf +description: Performs multi-sample segmentation using ASCAT +keywords: + - bam + - copy number + - cram +tools: + - ascat: + description: ASCAT is a method to derive copy number profiles of tumour cells, accounting for normal cell admixture and tumour aneuploidy. ASCAT infers tumour purity (the fraction of tumour cells) and ploidy (the amount of DNA per tumour cell), expressed as multiples of haploid genomes from SNP array or massively parallel sequencing data, and calculates whole-genome allele-specific copy number profiles (the number of copies of both parental alleles for all SNP loci across the genome). + documentation: https://github.com/VanLoo-lab/ascat/tree/master/man + tool_dev_url: https://github.com/VanLoo-lab/ascat + doi: "10.1093/bioinformatics/btaa538" + licence: ["GPL v3"] +input: + - args: + type: map + description: | + Groovy Map containing tool parameters. MUST follow the structure/keywords below and be provided via modules.config. Parameters must be set between quotes. (optional) parameters can be removed from the map, if they are not set. For default values, please check the documentation above. + + ``` + { + [ + "gender": "XX", + "genomeVersion": "hg19" + "purity": (optional), + "ploidy": (optional), + "gc_files": (optional), + "minCounts": (optional), + "BED_file": (optional) but recommended for WES, + "chrom_names": (optional), + "min_base_qual": (optional), + "min_map_qual": (optional), + "ref_fasta": (optional), + "skip_allele_counting_tumour": (optional), + "skip_allele_counting_normal": (optional) + ] + } + ``` + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_normal: + type: file + description: BAM/CRAM file, must adhere to chr1, chr2, ...chrX notation For modifying chromosome notation in bam files please follow https://josephcckuo.wordpress.com/2016/11/17/modify-chromosome-notation-in-bam-file/. + pattern: "*.{bam,cram}" + - index_normal: + type: file + description: index for normal_bam/cram + pattern: "*.{bai,crai}" + - input_tumor: + type: file + description: BAM/CRAM file, must adhere to chr1, chr2, ...chrX notation + pattern: "*.{bam,cram}" + - index_tumor: + type: file + description: index for tumor_bam/cram + pattern: "*.{bai,crai}" + - allele_files: + type: file + description: allele files for ASCAT WGS. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS + - loci_files: + type: file + description: loci files for ASCAT WGS. Loci files without chromosome notation can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS Make sure the chromosome notation matches the bam/cram input files. To add the chromosome notation to loci files (hg19/hg38) if necessary, you can run this command `if [[ $(samtools view | head -n1 | cut -f3)\" == *\"chr\"* ]]; then for i in {1..22} X; do sed -i 's/^/chr/' G1000_loci_hg19_chr_${i}.txt; done; fi` + - bed_file: + type: file + description: Bed file for ASCAT WES (optional, but recommended for WES) + - fasta: + type: file + description: Reference fasta file (optional) + - gc_file: + type: file + description: GC correction file (optional) - Used to do logR correction of the tumour sample(s) with genomic GC content + - rt_file: + type: file + description: replication timing correction file (optional, provide only in combination with gc_file) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - allelefreqs: + type: file + description: Files containing allee frequencies per chromosome + pattern: "*{alleleFrequencies_chr*.txt}" + - metrics: + type: file + description: File containing quality metrics + pattern: "*.{metrics.txt}" + - png: + type: file + description: ASCAT plots + pattern: "*.{png}" + - purityploidy: + type: file + description: File with purity and ploidy data + pattern: "*.{purityploidy.txt}" + - segments: + type: file + description: File with multi-sample segments data + pattern: "*.{asmultipcf_segments.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@aasNGC" + - "@lassefolkersen" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" +maintainers: + - "@aasNGC" + - "@lassefolkersen" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" diff --git a/nextflow.config b/nextflow.config index f01ed136d2..3ac6cbb616 100644 --- a/nextflow.config +++ b/nextflow.config @@ -73,6 +73,7 @@ params { ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling + asmultipcf = false // if true, enables multiple sample copy number calling with ASCAT only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired sample sentieon_dnascope_emit_mode = 'variant' // default value for Sentieon dnascope sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE' @@ -300,6 +301,7 @@ profiles { tools_somatic { includeConfig 'conf/test/tools_somatic.config' } tools_somatic_ascat { includeConfig 'conf/test/tools_somatic_ascat.config' } tools_tumoronly { includeConfig 'conf/test/tools_tumoronly.config' } + tools_somatic_asmultipcf { includeConfig 'conf/test/tools_somatic_ascat_asmultipcf.config' } trimming { includeConfig 'conf/test/trimming.config' } umi { includeConfig 'conf/test/umi.config' } use_gatk_spark { includeConfig 'conf/test/use_gatk_spark.config' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 2e66ccdf53..1cf76e1ef4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1028,5 +1028,10 @@ { "$ref": "#/$defs/generic_options" } - ] + ], + "properties": { + "asmultipcf": { + "type": "boolean" + } + } } diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index cdfabfc3ac..8878f2bd6f 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -7,6 +7,7 @@ include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_c include { BAM_VARIANT_CALLING_MPILEUP as MPILEUP_NORMAL } from '../bam_variant_calling_mpileup/main' include { BAM_VARIANT_CALLING_MPILEUP as MPILEUP_TUMOR } from '../bam_variant_calling_mpileup/main' include { BAM_VARIANT_CALLING_SOMATIC_ASCAT } from '../bam_variant_calling_somatic_ascat/main' + include { BAM_VARIANT_CALLING_SOMATIC_CONTROLFREEC } from '../bam_variant_calling_somatic_controlfreec/main' include { BAM_VARIANT_CALLING_SOMATIC_MANTA } from '../bam_variant_calling_somatic_manta/main' include { BAM_VARIANT_CALLING_SOMATIC_MUTECT2 } from '../bam_variant_calling_somatic_mutect2/main' @@ -41,6 +42,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { gc_file // channel: [optional] ascat gc content file rt_file // channel: [optional] ascat rt file joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode + asmultipcf // boolean: [mandatory] [default: false] run ascat in multi-sample mode wes // boolean: [mandatory] [default: false] whether targeted data is processed main: @@ -53,16 +55,23 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { out_msisensorpro = Channel.empty() vcf_mutect2 = Channel.empty() vcf_tiddit = Channel.empty() - + // ASCAT if (tools.split(',').contains('ascat')) { BAM_VARIANT_CALLING_SOMATIC_ASCAT( - cram, + // Remap channel to match module/subworkflow + // Adjust meta.map to handle both regular and asmultipcf modes + cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> + params.asmultipcf ? + [ meta + [ id:meta.patient ], normal_cram, normal_crai, tumor_cram, tumor_crai ] : + [ meta, normal_cram, normal_crai, tumor_cram, tumor_crai ] + }, allele_files, loci_files, (wes ? intervals_bed_combined : []), // No intervals needed if not WES fasta.map{ meta, fasta -> [ fasta ] }, gc_file, - rt_file + rt_file, + params.asmultipcf // Pass asmultipcf parameter to the process ) versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_ASCAT.out.versions) diff --git a/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf index 22802cfb58..e03b747e65 100644 --- a/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf @@ -5,9 +5,9 @@ // A when clause condition is defined in the conf/modules.config to determine if the module should be run include { ASCAT } from '../../../modules/nf-core/ascat/main' +include { ASMULTIPCF } from '../../../modules/nf-core/asmultipcf/main' workflow BAM_VARIANT_CALLING_SOMATIC_ASCAT { - take: cram_pair // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai] allele_files // channel: [mandatory] zip @@ -16,15 +16,50 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ASCAT { fasta // channel: [optional] fasta needed for cram gc_file // channel: [optional] txt for LogRCorrection rt_file // channel: [optional] txt for LogRCorrection + asmultipcf // boolean: [mandatory] whether to run ASMULTIPCF main: - ch_versions = Channel.empty() + // Group input by patient + cram_pair_by_patient = cram_pair + .map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> + [meta.patient, meta, normal_cram, normal_crai, tumor_cram, tumor_crai] + } + .groupTuple() + + // Run ASCAT for all samples ASCAT(cram_pair, allele_files, loci_files, intervals_bed, fasta, gc_file, rt_file) + // Group ASCAT outputs by patient + ascat_output_by_patient = ASCAT.out.logrs.join(ASCAT.out.bafs) + .map { meta, logr, baf -> [meta.patient, meta, logr, baf] } + .groupTuple() + + if (params.asmultipcf) { + // Prepare input for ASMULTIPCF + asmultipcf_input = ascat_output_by_patient + .map { patient, metas, logrs, bafs -> + def meta = metas[0] + [id: patient] + def tumor_logrs = logrs.findAll { it.name.contains('tumor') } + def tumor_bafs = bafs.findAll { it.name.contains('tumor') } + def normal_logr = logrs.find { it.name.contains('normal') } + def normal_baf = bafs.find { it.name.contains('normal') } + [meta, tumor_logrs, tumor_bafs, normal_logr, normal_baf] + } + + // Run ASMULTIPCF + ASMULTIPCF(asmultipcf_input) + + ch_versions = ch_versions.mix(ASMULTIPCF.out.versions) + } + ch_versions = ch_versions.mix(ASCAT.out.versions) emit: + ascat_segments = ASCAT.out.segments + ascat_purityploidy = ASCAT.out.purityploidy + asmultipcf_segments = asmultipcf ? ASMULTIPCF.out.asmultipcf_segments : Channel.empty() + asmultipcf_purityploidy = asmultipcf ? ASMULTIPCF.out.asmultipcf_purityploidy : Channel.empty() versions = ch_versions } diff --git a/tests/csv/3.0/ascat_somatic_asmultipcf.csv b/tests/csv/3.0/ascat_somatic_asmultipcf.csv new file mode 100644 index 0000000000..012b1a26f0 --- /dev/null +++ b/tests/csv/3.0/ascat_somatic_asmultipcf.csv @@ -0,0 +1,4 @@ +patient,sex,status,sample,cram,crai +test3,XX,0,sample3,HG00145.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram,HG00145.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram.crai +test3,XX,1,sample4,HG00146.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram,HG00146.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram.crai +test3,XX,1,sample5,HG00147.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram,HG00147.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram.crai diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index 7595c279e7..d11c610ed4 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -771,6 +771,7 @@ workflow SAREK { gc_file, rt_file, params.joint_mutect2, + params.asmultipcf, params.wes )