From f67ae86288c7bb8dd1c50153f97d98a6307d2af0 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 12:53:02 +0000 Subject: [PATCH 01/14] Fix minor umi dedup log issue --- workflows/rnaseq/nextflow.config | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index e7a6290fa..ed033088a 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -140,12 +140,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { [ path: { "${params.outdir}/${params.aligner}/umitools" }, mode: params.publish_dir_mode, - pattern: '*.tsv' - ], - [ - path: { "${params.outdir}/${params.aligner}/umitools/transcriptomic_dedup_log" }, - mode: params.publish_dir_mode, - pattern: '*.log' + pattern: '*.{log,tsv}' ], [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, From 37b52b5b783e849497daa9728ba9b073b6fe99d0 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 13:14:28 +0000 Subject: [PATCH 02/14] better log file fix --- conf/test.config | 4 +++ workflows/rnaseq/nextflow.config | 57 ++++++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/conf/test.config b/conf/test.config index 153200306..23331e60a 100644 --- a/conf/test.config +++ b/conf/test.config @@ -41,6 +41,10 @@ params { skip_bbsplit = false pseudo_aligner = 'salmon' umitools_bc_pattern = 'NNNN' + + remove_ribo_rna= true + sortmerna_index = '/Users/jonathan.manning/projects/rnaseq/idx' + skip_multiqc = true } // When using RSEM, remove warning from STAR whilst building tiny indices diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index ed033088a..88a8749d7 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -137,11 +137,6 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_TRANSCRIPTOME:UMI(COLLAPSE|TOOLS_DEDUP)' { ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted" } publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/umitools" }, - mode: params.publish_dir_mode, - pattern: '*.{log,tsv}' - ], [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, @@ -240,20 +235,60 @@ if (!params.skip_alignment) { ext.prefix = { "${meta.id}.umi_dedup.sorted" } publishDir = [ [ - path: { "${params.outdir}/${params.aligner}/umitools" }, + path: { params.save_align_intermeds || params.with_umi || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, - pattern: '*.tsv' - ], + pattern: '*.bam', + saveAs: { params.save_align_intermeds || params.with_umi || params.save_umi_intermeds ? it : null } + ] + ] + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME:UMICOLLAPSE' { + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/umicollapse/genomic_dedup_log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] + ] + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME:UMICOLLAPSE' { + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/umicollapse/transcriptomic_dedup_log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] + ] + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:UMITOOLS' { + publishDir = [ [ path: { "${params.outdir}/${params.aligner}/umitools/genomic_dedup_log" }, mode: params.publish_dir_mode, pattern: '*.log' + ] + ] + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:UMITOOLS' { + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/umitools/transcriptomic_dedup_log" }, + mode: params.publish_dir_mode, + pattern: '*.log' ], + ] + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_(GEN_TRANSCRIP)TOME:UMITOOLS' { + publishDir = [ [ - path: { params.save_align_intermeds || params.with_umi || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, + path: { "${params.outdir}/${params.aligner}/umitools" }, mode: params.publish_dir_mode, - pattern: '*.bam', - saveAs: { params.save_align_intermeds || params.with_umi || params.save_umi_intermeds ? it : null } + pattern: '*.tsv' ] ] } From e8459aa16fd592bd9072dda0da6c2d49501ddfe5 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 13:15:26 +0000 Subject: [PATCH 03/14] undo config change --- conf/test.config | 4 ---- 1 file changed, 4 deletions(-) diff --git a/conf/test.config b/conf/test.config index 23331e60a..153200306 100644 --- a/conf/test.config +++ b/conf/test.config @@ -41,10 +41,6 @@ params { skip_bbsplit = false pseudo_aligner = 'salmon' umitools_bc_pattern = 'NNNN' - - remove_ribo_rna= true - sortmerna_index = '/Users/jonathan.manning/projects/rnaseq/idx' - skip_multiqc = true } // When using RSEM, remove warning from STAR whilst building tiny indices From 413827037aff62c2e0ceac5502fb163df295baf9 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 13:30:20 +0000 Subject: [PATCH 04/14] tiny fix --- workflows/rnaseq/nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index 88a8749d7..cdbddc317 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -283,7 +283,7 @@ if (!params.skip_alignment) { ] } - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_(GEN_TRANSCRIP)TOME:UMITOOLS' { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_(GEN_TRANSCRIPT)OME:UMITOOLS' { publishDir = [ [ path: { "${params.outdir}/${params.aligner}/umitools" }, From 16148b9a8aa6cf5251f6e20d5406ab09dad7b4cd Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 13:31:26 +0000 Subject: [PATCH 05/14] tiny fix --- workflows/rnaseq/nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index cdbddc317..c9229a45f 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -283,7 +283,7 @@ if (!params.skip_alignment) { ] } - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_(GEN_TRANSCRIPT)OME:UMITOOLS' { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_(GEN|TRANSCRIPT)OME:UMITOOLS' { publishDir = [ [ path: { "${params.outdir}/${params.aligner}/umitools" }, From 685c1b15ae7ea344c016ab139ea909fb57b734c8 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 21:27:21 +0000 Subject: [PATCH 06/14] Tidy up umitools/ umicollapse config --- workflows/rnaseq/nextflow.config | 96 +++++++++++++++++++------------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/workflows/rnaseq/nextflow.config b/workflows/rnaseq/nextflow.config index c9229a45f..a621b3bbe 100644 --- a/workflows/rnaseq/nextflow.config +++ b/workflows/rnaseq/nextflow.config @@ -134,14 +134,49 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') { ] } + // Use the same umi_dedup prefix for umitools and umicollapse + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_TRANSCRIPTOME:UMI(COLLAPSE|TOOLS_DEDUP)' { ext.prefix = { "${meta.id}.umi_dedup.transcriptome.sorted" } + } + + // Publishing logic for umitools: + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:UMITOOLS_DEDUP' { publishDir = [ [ path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, pattern: '*.bam', saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } + ], + [ + path: { "${params.outdir}/${params.aligner}/umitools/transcriptomic_dedup_log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + [ + path: { "${params.outdir}/${params.aligner}/umitools" }, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] + ] + } + + // Publishing logic for umicollapse + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME:UMICOLLAPSE' { + publishDir = [ + [ + path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } + ], + [ + path: { "${params.outdir}/${params.aligner}/umicollapse/transcriptomic_dedup_log" }, + mode: params.publish_dir_mode, + pattern: '*.log' ] ] } @@ -231,64 +266,49 @@ if (!params.skip_alignment) { ].join(' ').trim()} } + // Use the same umi_dedup prefix for umitools and umicollapse + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMI(COLLAPSE|TOOLS)_GENOME:UMI(COLLAPSE|TOOLS_DEDUP)' { ext.prefix = { "${meta.id}.umi_dedup.sorted" } - publishDir = [ - [ - path: { params.save_align_intermeds || params.with_umi || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, - mode: params.publish_dir_mode, - pattern: '*.bam', - saveAs: { params.save_align_intermeds || params.with_umi || params.save_umi_intermeds ? it : null } - ] - ] } - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME:UMICOLLAPSE' { - publishDir = [ - [ - path: { "${params.outdir}/${params.aligner}/umicollapse/genomic_dedup_log" }, - mode: params.publish_dir_mode, - pattern: '*.log' - ] - ] - } + // Publishing logic for umitools: - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_TRANSCRIPTOME:UMICOLLAPSE' { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:UMITOOLS_DEDUP' { publishDir = [ [ - path: { "${params.outdir}/${params.aligner}/umicollapse/transcriptomic_dedup_log" }, + path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, - pattern: '*.log' - ] - ] - } - - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME:UMITOOLS' { - publishDir = [ + pattern: '*.bam', + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } + ], [ path: { "${params.outdir}/${params.aligner}/umitools/genomic_dedup_log" }, mode: params.publish_dir_mode, pattern: '*.log' + ], + [ + path: { "${params.outdir}/${params.aligner}/umitools" }, + mode: params.publish_dir_mode, + pattern: '*.tsv' ] ] } - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME:UMITOOLS' { + // Publishing logic for umicollapse + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMICOLLAPSE_GENOME:UMICOLLAPSE' { publishDir = [ [ - path: { "${params.outdir}/${params.aligner}/umitools/transcriptomic_dedup_log" }, + path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, - pattern: '*.log' + pattern: '*.bam', + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } ], - ] - } - - withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_(GEN|TRANSCRIPT)OME:UMITOOLS' { - publishDir = [ [ - path: { "${params.outdir}/${params.aligner}/umitools" }, + path: { "${params.outdir}/${params.aligner}/umicollapse/genomic_dedup_log" }, mode: params.publish_dir_mode, - pattern: '*.tsv' + pattern: '*.log' ] ] } @@ -297,10 +317,10 @@ if (!params.skip_alignment) { ext.args = { params.bam_csi_index ? '-c' : '' } ext.prefix = { "${meta.id}.umi_dedup.sorted" } publishDir = [ - path: { params.save_align_intermeds || params.with_umi || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, + path: { params.save_align_intermeds || params.save_umi_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir }, mode: params.publish_dir_mode, pattern: '*.{bai,csi}', - saveAs: { params.save_align_intermeds || params.with_umi || params.save_umi_intermeds ? it : null } + saveAs: { params.save_align_intermeds || params.save_umi_intermeds ? it : null } ] } From cd97229b9d954931fe7388a201c1bf675a2e5a95 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 21:29:49 +0000 Subject: [PATCH 07/14] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb3b78e7a..18f790c09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Special thanks to the following for their contributions to the release: - [PR #1471](https://github.com/nf-core/rnaseq/pull/1471) - Fix prepare_genome subworkflow for sortmerna - [PR #1473](https://github.com/nf-core/rnaseq/pull/1473) - Bump STAR modules - [PR #1474](https://github.com/nf-core/rnaseq/pull/1474) - Bump versions to 3.18.0 +- [PR #1475](https://github.com/nf-core/rnaseq/pull/1475) - Fix log publishing around umitools/ umicollapse ## Parameters From bdcd760196c62fa41a3a3fe8b8c29b7ea240149d Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 22:16:10 +0000 Subject: [PATCH 08/14] Update UMI test --- tests/umi.nf.test | 2 + tests/umi.nf.test.snap | 122 ++++++++++++++++++++++++++++++++--------- 2 files changed, 97 insertions(+), 27 deletions(-) diff --git a/tests/umi.nf.test b/tests/umi.nf.test index dba4c07fe..234db5908 100644 --- a/tests/umi.nf.test +++ b/tests/umi.nf.test @@ -14,7 +14,9 @@ nextflow_pipeline { skip_bbsplit = true umi_dedup_tool = 'umicollapse' aligner = 'hisat2' + publish_dir_mode = 'link' outdir = "$outputDir" + save_umi_intermeds = true } } diff --git a/tests/umi.nf.test.snap b/tests/umi.nf.test.snap index 4e153a33b..e9d1efe10 100644 --- a/tests/umi.nf.test.snap +++ b/tests/umi.nf.test.snap @@ -612,6 +612,10 @@ "star_salmon/RAP1_IAA_30M_REP1", "star_salmon/RAP1_IAA_30M_REP1.umi_dedup.sorted.bam", "star_salmon/RAP1_IAA_30M_REP1.umi_dedup.sorted.bam.bai", + "star_salmon/RAP1_IAA_30M_REP1.umi_dedup.transcriptome.bam", + "star_salmon/RAP1_IAA_30M_REP1.umi_dedup.transcriptome.filtered.bam", + "star_salmon/RAP1_IAA_30M_REP1.umi_dedup.transcriptome.sorted.bam", + "star_salmon/RAP1_IAA_30M_REP1.umi_dedup.transcriptome.sorted.bam.bai", "star_salmon/RAP1_IAA_30M_REP1/aux_info", "star_salmon/RAP1_IAA_30M_REP1/aux_info/ambig_info.tsv", "star_salmon/RAP1_IAA_30M_REP1/aux_info/expected_bias.gz", @@ -629,6 +633,9 @@ "star_salmon/RAP1_UNINDUCED_REP1", "star_salmon/RAP1_UNINDUCED_REP1.umi_dedup.sorted.bam", "star_salmon/RAP1_UNINDUCED_REP1.umi_dedup.sorted.bam.bai", + "star_salmon/RAP1_UNINDUCED_REP1.umi_dedup.transcriptome.bam", + "star_salmon/RAP1_UNINDUCED_REP1.umi_dedup.transcriptome.sorted.bam", + "star_salmon/RAP1_UNINDUCED_REP1.umi_dedup.transcriptome.sorted.bam.bai", "star_salmon/RAP1_UNINDUCED_REP1/aux_info", "star_salmon/RAP1_UNINDUCED_REP1/aux_info/ambig_info.tsv", "star_salmon/RAP1_UNINDUCED_REP1/aux_info/expected_bias.gz", @@ -646,6 +653,9 @@ "star_salmon/RAP1_UNINDUCED_REP2", "star_salmon/RAP1_UNINDUCED_REP2.umi_dedup.sorted.bam", "star_salmon/RAP1_UNINDUCED_REP2.umi_dedup.sorted.bam.bai", + "star_salmon/RAP1_UNINDUCED_REP2.umi_dedup.transcriptome.bam", + "star_salmon/RAP1_UNINDUCED_REP2.umi_dedup.transcriptome.sorted.bam", + "star_salmon/RAP1_UNINDUCED_REP2.umi_dedup.transcriptome.sorted.bam.bai", "star_salmon/RAP1_UNINDUCED_REP2/aux_info", "star_salmon/RAP1_UNINDUCED_REP2/aux_info/ambig_info.tsv", "star_salmon/RAP1_UNINDUCED_REP2/aux_info/expected_bias.gz", @@ -663,6 +673,10 @@ "star_salmon/WT_REP1", "star_salmon/WT_REP1.umi_dedup.sorted.bam", "star_salmon/WT_REP1.umi_dedup.sorted.bam.bai", + "star_salmon/WT_REP1.umi_dedup.transcriptome.bam", + "star_salmon/WT_REP1.umi_dedup.transcriptome.filtered.bam", + "star_salmon/WT_REP1.umi_dedup.transcriptome.sorted.bam", + "star_salmon/WT_REP1.umi_dedup.transcriptome.sorted.bam.bai", "star_salmon/WT_REP1/aux_info", "star_salmon/WT_REP1/aux_info/ambig_info.tsv", "star_salmon/WT_REP1/aux_info/expected_bias.gz", @@ -680,6 +694,10 @@ "star_salmon/WT_REP2", "star_salmon/WT_REP2.umi_dedup.sorted.bam", "star_salmon/WT_REP2.umi_dedup.sorted.bam.bai", + "star_salmon/WT_REP2.umi_dedup.transcriptome.bam", + "star_salmon/WT_REP2.umi_dedup.transcriptome.filtered.bam", + "star_salmon/WT_REP2.umi_dedup.transcriptome.sorted.bam", + "star_salmon/WT_REP2.umi_dedup.transcriptome.sorted.bam.bai", "star_salmon/WT_REP2/aux_info", "star_salmon/WT_REP2/aux_info/ambig_info.tsv", "star_salmon/WT_REP2/aux_info/expected_bias.gz", @@ -1261,10 +1279,18 @@ "trimgalore/WT_REP2_trimmed_2.fastq.gz_trimming_report.txt", "umitools", "umitools/RAP1_IAA_30M_REP1.umi_extract.log", + "umitools/RAP1_IAA_30M_REP1.umi_extract_1.fastq.gz", + "umitools/RAP1_IAA_30M_REP1.umi_extract_2.fastq.gz", + "umitools/RAP1_UNINDUCED_REP1.umi_extract.fastq.gz", "umitools/RAP1_UNINDUCED_REP1.umi_extract.log", + "umitools/RAP1_UNINDUCED_REP2.umi_extract.fastq.gz", "umitools/RAP1_UNINDUCED_REP2.umi_extract.log", "umitools/WT_REP1.umi_extract.log", - "umitools/WT_REP2.umi_extract.log" + "umitools/WT_REP1.umi_extract_1.fastq.gz", + "umitools/WT_REP1.umi_extract_2.fastq.gz", + "umitools/WT_REP2.umi_extract.log", + "umitools/WT_REP2.umi_extract_1.fastq.gz", + "umitools/WT_REP2.umi_extract_2.fastq.gz" ], [ "genome_gfp.fasta:md5,e23e302af63736a199985a169fdac055", @@ -1467,14 +1493,22 @@ "WT_REP2.umi_dedup.sorted_per_umi_per_position.tsv:md5,6f5656947a7f0076df446e6f40430027", "WT_REP2.umi_dedup.transcriptome.sorted_edit_distance.tsv:md5,3e3c6a7e8996e566350742e9911366d3", "WT_REP2.umi_dedup.transcriptome.sorted_per_umi.tsv:md5,0c986c4cb7a77f650a19e2c454b9b179", - "WT_REP2.umi_dedup.transcriptome.sorted_per_umi_per_position.tsv:md5,af9028dbdab81de3854a32cd1d19ac8b" + "WT_REP2.umi_dedup.transcriptome.sorted_per_umi_per_position.tsv:md5,af9028dbdab81de3854a32cd1d19ac8b", + "RAP1_IAA_30M_REP1.umi_extract_1.fastq.gz:md5,e83d7f738fbbfaa541a2e71fe4663447", + "RAP1_IAA_30M_REP1.umi_extract_2.fastq.gz:md5,4f2873cbf584d6e84187238a4ae2b8fa", + "RAP1_UNINDUCED_REP1.umi_extract.fastq.gz:md5,9e42242fd68baac592140f63a8a716ce", + "RAP1_UNINDUCED_REP2.umi_extract.fastq.gz:md5,5a92b642927b8603c4765e5305e23e9c", + "WT_REP1.umi_extract_1.fastq.gz:md5,f312fac9c384a889ae4f959839263604", + "WT_REP1.umi_extract_2.fastq.gz:md5,ffca24924108fd54151620b7538b9e1a", + "WT_REP2.umi_extract_1.fastq.gz:md5,c3180451a24ce51fc35c1684521ae287", + "WT_REP2.umi_extract_2.fastq.gz:md5,067ff23f8d1307ad241cd70bc186b5c1" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T18:07:55.751564456" + "timestamp": "2024-12-19T21:54:27.627773754" }, "Params: --aligner hisat2 --umi_dedup_tool 'umicollapse'": { "content": [ @@ -2130,13 +2164,13 @@ "hisat2/stringtie/WT_REP2.coverage.gtf", "hisat2/stringtie/WT_REP2.gene.abundance.txt", "hisat2/stringtie/WT_REP2.transcripts.gtf", - "hisat2/umitools", - "hisat2/umitools/genomic_dedup_log", - "hisat2/umitools/genomic_dedup_log/RAP1_IAA_30M_REP1.umi_dedup.sorted_UMICollapse.log", - "hisat2/umitools/genomic_dedup_log/RAP1_UNINDUCED_REP1.umi_dedup.sorted_UMICollapse.log", - "hisat2/umitools/genomic_dedup_log/RAP1_UNINDUCED_REP2.umi_dedup.sorted_UMICollapse.log", - "hisat2/umitools/genomic_dedup_log/WT_REP1.umi_dedup.sorted_UMICollapse.log", - "hisat2/umitools/genomic_dedup_log/WT_REP2.umi_dedup.sorted_UMICollapse.log", + "hisat2/umicollapse", + "hisat2/umicollapse/genomic_dedup_log", + "hisat2/umicollapse/genomic_dedup_log/RAP1_IAA_30M_REP1.umi_dedup.sorted_UMICollapse.log", + "hisat2/umicollapse/genomic_dedup_log/RAP1_UNINDUCED_REP1.umi_dedup.sorted_UMICollapse.log", + "hisat2/umicollapse/genomic_dedup_log/RAP1_UNINDUCED_REP2.umi_dedup.sorted_UMICollapse.log", + "hisat2/umicollapse/genomic_dedup_log/WT_REP1.umi_dedup.sorted_UMICollapse.log", + "hisat2/umicollapse/genomic_dedup_log/WT_REP2.umi_dedup.sorted_UMICollapse.log", "multiqc", "multiqc/hisat2", "multiqc/hisat2/multiqc_report.html", @@ -2548,10 +2582,18 @@ "trimgalore/WT_REP2_trimmed_2.fastq.gz_trimming_report.txt", "umitools", "umitools/RAP1_IAA_30M_REP1.umi_extract.log", + "umitools/RAP1_IAA_30M_REP1.umi_extract_1.fastq.gz", + "umitools/RAP1_IAA_30M_REP1.umi_extract_2.fastq.gz", + "umitools/RAP1_UNINDUCED_REP1.umi_extract.fastq.gz", "umitools/RAP1_UNINDUCED_REP1.umi_extract.log", + "umitools/RAP1_UNINDUCED_REP2.umi_extract.fastq.gz", "umitools/RAP1_UNINDUCED_REP2.umi_extract.log", "umitools/WT_REP1.umi_extract.log", - "umitools/WT_REP2.umi_extract.log" + "umitools/WT_REP1.umi_extract_1.fastq.gz", + "umitools/WT_REP1.umi_extract_2.fastq.gz", + "umitools/WT_REP2.umi_extract.log", + "umitools/WT_REP2.umi_extract_1.fastq.gz", + "umitools/WT_REP2.umi_extract_2.fastq.gz" ], [ "genome_gfp.fasta:md5,e23e302af63736a199985a169fdac055", @@ -2621,6 +2663,11 @@ "e_data.ctab:md5,593daf0b84f008c0c64319990ef2fd42", "i2t.ctab:md5,dda3d3ccd7d4184d947c654ae73efb7b", "i_data.ctab:md5,01c675638f918a70ea689f618a2610bc", + "RAP1_IAA_30M_REP1.umi_dedup.sorted_UMICollapse.log:md5,d6eaaabd7053bef88f3af2b6fc2df45b", + "RAP1_UNINDUCED_REP1.umi_dedup.sorted_UMICollapse.log:md5,441be871ac12aa94f871fab9e8b6b4bf", + "RAP1_UNINDUCED_REP2.umi_dedup.sorted_UMICollapse.log:md5,8a0025a58969badeb3dc35a7c1bdd68e", + "WT_REP1.umi_dedup.sorted_UMICollapse.log:md5,75e6da19996c3ea6fde5cdd4be2dcfda", + "WT_REP2.umi_dedup.sorted_UMICollapse.log:md5,87cdd1eadc3db19bd9f45bb941b1e1a7", "cutadapt_filtered_reads_plot.txt:md5,3f122969fa288888e5abef061b7963f2", "cutadapt_trimmed_sequences_plot_3_Counts.txt:md5,5e8a821c9a4deb46c11bc65969b8864f", "cutadapt_trimmed_sequences_plot_3_Obs_Exp.txt:md5,bf8abefa7c5f2f1e1140749983279d9d", @@ -2688,22 +2735,27 @@ "cmd_info.json:md5,809380ddce725a8fab75dd7741b64bf6", "lib_format_counts.json:md5,d231ba7624b67eb654989f69530e2925", "R_sessionInfo.log:md5,fb0da0d7ad6994ed66a8e68348b19676", - "tx2gene.tsv:md5,0e2418a69d2eba45097ebffc2f700bfe" + "tx2gene.tsv:md5,0e2418a69d2eba45097ebffc2f700bfe", + "RAP1_IAA_30M_REP1.umi_extract_1.fastq.gz:md5,e83d7f738fbbfaa541a2e71fe4663447", + "RAP1_IAA_30M_REP1.umi_extract_2.fastq.gz:md5,4f2873cbf584d6e84187238a4ae2b8fa", + "RAP1_UNINDUCED_REP1.umi_extract.fastq.gz:md5,9e42242fd68baac592140f63a8a716ce", + "RAP1_UNINDUCED_REP2.umi_extract.fastq.gz:md5,5a92b642927b8603c4765e5305e23e9c", + "WT_REP1.umi_extract_1.fastq.gz:md5,f312fac9c384a889ae4f959839263604", + "WT_REP1.umi_extract_2.fastq.gz:md5,ffca24924108fd54151620b7538b9e1a", + "WT_REP2.umi_extract_1.fastq.gz:md5,c3180451a24ce51fc35c1684521ae287", + "WT_REP2.umi_extract_2.fastq.gz:md5,067ff23f8d1307ad241cd70bc186b5c1" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T18:01:45.228731692" + "timestamp": "2024-12-19T22:07:42.25625667" }, "--umi_dedup_tool 'umitools - stub": { "content": [ - 32, + 31, { - "BBMAP_BBSPLIT": { - "bbmap": 39.1 - }, "CAT_FASTQ": { "cat": 9.5 }, @@ -2793,20 +2845,36 @@ "trimgalore/WT_REP2_trimmed_2.fastq.gz_trimming_report.txt", "umitools", "umitools/RAP1_IAA_30M_REP1.umi_extract.log", + "umitools/RAP1_IAA_30M_REP1.umi_extract_1.fastq.gz", + "umitools/RAP1_IAA_30M_REP1.umi_extract_2.fastq.gz", + "umitools/RAP1_UNINDUCED_REP1.umi_extract.fastq.gz", "umitools/RAP1_UNINDUCED_REP1.umi_extract.log", + "umitools/RAP1_UNINDUCED_REP2.umi_extract.fastq.gz", "umitools/RAP1_UNINDUCED_REP2.umi_extract.log", "umitools/WT_REP1.umi_extract.log", - "umitools/WT_REP2.umi_extract.log" + "umitools/WT_REP1.umi_extract_1.fastq.gz", + "umitools/WT_REP1.umi_extract_2.fastq.gz", + "umitools/WT_REP2.umi_extract.log", + "umitools/WT_REP2.umi_extract_1.fastq.gz", + "umitools/WT_REP2.umi_extract_2.fastq.gz" ], [ "genome_transcriptome.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", - "genome_transcriptome.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + "genome_transcriptome.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "RAP1_IAA_30M_REP1.umi_extract_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "RAP1_IAA_30M_REP1.umi_extract_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "RAP1_UNINDUCED_REP1.umi_extract.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "RAP1_UNINDUCED_REP2.umi_extract.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "WT_REP1.umi_extract_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "WT_REP1.umi_extract_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "WT_REP2.umi_extract_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "WT_REP2.umi_extract_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-12-11T18:08:48.404716766" + "timestamp": "2024-12-19T21:55:24.38516136" } -} +} \ No newline at end of file From b799aa3ed291741e0221ea9a7bd2a21f34e5ca40 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 22:17:37 +0000 Subject: [PATCH 09/14] Update umi.nf.test --- tests/umi.nf.test | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/umi.nf.test b/tests/umi.nf.test index 234db5908..5ca3d2420 100644 --- a/tests/umi.nf.test +++ b/tests/umi.nf.test @@ -14,7 +14,6 @@ nextflow_pipeline { skip_bbsplit = true umi_dedup_tool = 'umicollapse' aligner = 'hisat2' - publish_dir_mode = 'link' outdir = "$outputDir" save_umi_intermeds = true } From 6ce70962cbc05b309d57acb3211985ca96b236f7 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 22:36:45 +0000 Subject: [PATCH 10/14] Exclude umi logs --- tests/.nftignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/.nftignore b/tests/.nftignore index 7f3fad699..8043747d3 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -32,7 +32,7 @@ umitools/*.umi_extract.log {hisat2,star_rsem,star_salmon}/stringtie/*.ballgown/t_data.ctab {hisat2,star_rsem,star_salmon}/stringtie/*.gene.abundance.txt {hisat2,star_rsem,star_salmon}/stringtie/*.{coverage,transcripts}.gtf -{hisat2,star_rsem,star_salmon}/umitools/genomic_dedup_log/*_UMICollapse.log +{hisat2,star_rsem,star_salmon}/{umitools,umicollapse}/{genomic,transcriptomic}_dedup_log/*.log {multiqc,multiqc/**}/multiqc_report.html {multiqc,multiqc/**}/multiqc_report_data/fastqc_{raw,trimmed}_top_overrepresented_sequences_table.txt {multiqc,multiqc/**}/multiqc_report_data/hisat2_pe_plot.txt From cb59092581424f893baeddd999176ade4d73ede7 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 22:45:18 +0000 Subject: [PATCH 11/14] Update outputs in docs --- docs/output.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/output.md b/docs/output.md index 0bea48601..34326ea23 100644 --- a/docs/output.md +++ b/docs/output.md @@ -120,7 +120,7 @@ If multiple libraries/runs have been provided for the same sample in the input s -[UMI-tools](https://github.com/CGATOxford/UMI-tools) deduplicates reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name. Secondly, reads are deduplicated based on UMI identifier after mapping as highlighted in the [UMI-tools dedup](#umi-tools-dedup) section. +[UMI-tools](https://github.com/CGATOxford/UMI-tools) and [UMICollapse](https://github.com/Daniel-Liu-c0deb0t/UMICollapse) deduplicate reads based on unique molecular identifiers (UMIs) to address PCR-bias. Firstly, the UMI-tools `extract` command removes the UMI barcode information from the read sequence and adds it to the read name. Secondly, reads are deduplicated based on UMI identifier after mapping as highlighted in the [UMI dedup](#umi-dedup) section. To facilitate processing of input data which has the UMI barcode already embedded in the read name from the start, `--skip_umi_extract` can be specified in conjunction with `--with_umi`. @@ -305,7 +305,7 @@ The original BAM files generated by the selected alignment algorithm are further ![MultiQC - SAMtools mapped reads per contig plot](images/mqc_samtools_idxstats.png) -### UMI-tools dedup +### UMI dedup
Output files @@ -314,7 +314,7 @@ The original BAM files generated by the selected alignment algorithm are further - `.umi_dedup.sorted.bam`: If `--save_umi_intermeds` is specified the UMI deduplicated, coordinate sorted BAM file containing read alignments will be placed in this directory. - `.umi_dedup.sorted.bam.bai`: If `--save_umi_intermeds` is specified the BAI index file for the UMI deduplicated, coordinate sorted BAM file will be placed in this directory. - `.umi_dedup.sorted.bam.csi`: If `--save_umi_intermeds --bam_csi_index` is specified the CSI index file for the UMI deduplicated, coordinate sorted BAM file will be placed in this directory. -- `/umitools/` +- `/umitools/` (UMI-tools only) - `*_edit_distance.tsv`: Reports the (binned) average edit distance between the UMIs at each position. - `*_per_umi.tsv`: UMI-level summary statistics. - `*_per_umi_per_position.tsv`: Tabulates the counts for unique combinations of UMI and position. @@ -323,7 +323,7 @@ The content of the files above is explained in more detail in the [UMI-tools doc
-After extracting the UMI information from the read sequence (see [UMI-tools extract](#umi-tools-extract)), the second step in the removal of UMI barcodes involves deduplicating the reads based on both mapping and UMI barcode information using the UMI-tools `dedup` command. This will generate a filtered BAM file after the removal of PCR duplicates. +After extracting the UMI information from the read sequence (see [UMI-tools extract](#umi-tools-extract)), the second step in the removal of UMI barcodes involves deduplicating the reads based on both mapping and UMI barcode information. UMI deduplication can be carried out either with [UMI-tools](https://github.com/CGATOxford/UMI-tools) or [UMICollapse](https://github.com/Daniel-Liu-c0deb0t/UMICollapse), set via the `umi_dedup_tool` parameter. The output BAM files are the same, though UMI-tools has some additional outputs, as described above. Either method will generate a filtered BAM file after the removal of PCR duplicates. ### picard MarkDuplicates From c98f8f3db35fcfbf50fc650b16a5be972c4c9dfb Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 22:45:49 +0000 Subject: [PATCH 12/14] Fix snapshot after log exclusion --- tests/umi.nf.test.snap | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/umi.nf.test.snap b/tests/umi.nf.test.snap index e9d1efe10..a5d06e069 100644 --- a/tests/umi.nf.test.snap +++ b/tests/umi.nf.test.snap @@ -2663,11 +2663,6 @@ "e_data.ctab:md5,593daf0b84f008c0c64319990ef2fd42", "i2t.ctab:md5,dda3d3ccd7d4184d947c654ae73efb7b", "i_data.ctab:md5,01c675638f918a70ea689f618a2610bc", - "RAP1_IAA_30M_REP1.umi_dedup.sorted_UMICollapse.log:md5,d6eaaabd7053bef88f3af2b6fc2df45b", - "RAP1_UNINDUCED_REP1.umi_dedup.sorted_UMICollapse.log:md5,441be871ac12aa94f871fab9e8b6b4bf", - "RAP1_UNINDUCED_REP2.umi_dedup.sorted_UMICollapse.log:md5,8a0025a58969badeb3dc35a7c1bdd68e", - "WT_REP1.umi_dedup.sorted_UMICollapse.log:md5,75e6da19996c3ea6fde5cdd4be2dcfda", - "WT_REP2.umi_dedup.sorted_UMICollapse.log:md5,87cdd1eadc3db19bd9f45bb941b1e1a7", "cutadapt_filtered_reads_plot.txt:md5,3f122969fa288888e5abef061b7963f2", "cutadapt_trimmed_sequences_plot_3_Counts.txt:md5,5e8a821c9a4deb46c11bc65969b8864f", "cutadapt_trimmed_sequences_plot_3_Obs_Exp.txt:md5,bf8abefa7c5f2f1e1140749983279d9d", @@ -2750,7 +2745,7 @@ "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2024-12-19T22:07:42.25625667" + "timestamp": "2024-12-19T22:33:42.012684597" }, "--umi_dedup_tool 'umitools - stub": { "content": [ From 311b63396f4343a99ac9b060244da3d8897df87b Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 19 Dec 2024 23:31:52 +0000 Subject: [PATCH 13/14] Add keep intermeds to test file --- tests/umi.nf.test | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/umi.nf.test b/tests/umi.nf.test index 5ca3d2420..3a7083a28 100644 --- a/tests/umi.nf.test +++ b/tests/umi.nf.test @@ -50,6 +50,7 @@ nextflow_pipeline { umitools_dedup_stats = true skip_bbsplit = true outdir = "$outputDir" + save_umi_intermeds = true } } From 0a21c3f117ae3d6d850aca9781a2b49e42714d40 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 20 Dec 2024 00:03:58 +0000 Subject: [PATCH 14/14] Fix snapshot --- tests/umi.nf.test.snap | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/tests/umi.nf.test.snap b/tests/umi.nf.test.snap index a5d06e069..1d0df4de3 100644 --- a/tests/umi.nf.test.snap +++ b/tests/umi.nf.test.snap @@ -1508,7 +1508,7 @@ "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2024-12-19T21:54:27.627773754" + "timestamp": "2024-12-20T00:02:04.611696704" }, "Params: --aligner hisat2 --umi_dedup_tool 'umicollapse'": { "content": [ @@ -2749,8 +2749,11 @@ }, "--umi_dedup_tool 'umitools - stub": { "content": [ - 31, + 32, { + "BBMAP_BBSPLIT": { + "bbmap": 39.1 + }, "CAT_FASTQ": { "cat": 9.5 }, @@ -2840,36 +2843,20 @@ "trimgalore/WT_REP2_trimmed_2.fastq.gz_trimming_report.txt", "umitools", "umitools/RAP1_IAA_30M_REP1.umi_extract.log", - "umitools/RAP1_IAA_30M_REP1.umi_extract_1.fastq.gz", - "umitools/RAP1_IAA_30M_REP1.umi_extract_2.fastq.gz", - "umitools/RAP1_UNINDUCED_REP1.umi_extract.fastq.gz", "umitools/RAP1_UNINDUCED_REP1.umi_extract.log", - "umitools/RAP1_UNINDUCED_REP2.umi_extract.fastq.gz", "umitools/RAP1_UNINDUCED_REP2.umi_extract.log", "umitools/WT_REP1.umi_extract.log", - "umitools/WT_REP1.umi_extract_1.fastq.gz", - "umitools/WT_REP1.umi_extract_2.fastq.gz", - "umitools/WT_REP2.umi_extract.log", - "umitools/WT_REP2.umi_extract_1.fastq.gz", - "umitools/WT_REP2.umi_extract_2.fastq.gz" + "umitools/WT_REP2.umi_extract.log" ], [ "genome_transcriptome.fasta:md5,d41d8cd98f00b204e9800998ecf8427e", - "genome_transcriptome.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", - "RAP1_IAA_30M_REP1.umi_extract_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "RAP1_IAA_30M_REP1.umi_extract_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "RAP1_UNINDUCED_REP1.umi_extract.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "RAP1_UNINDUCED_REP2.umi_extract.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "WT_REP1.umi_extract_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "WT_REP1.umi_extract_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "WT_REP2.umi_extract_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "WT_REP2.umi_extract_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "genome_transcriptome.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2024-12-19T21:55:24.38516136" + "timestamp": "2024-12-19T23:28:01.570835895" } } \ No newline at end of file