Skip to content

Commit

Permalink
changed gene counts merge to use paste instead of csvtk
Browse files Browse the repository at this point in the history
  • Loading branch information
kerimoff committed May 3, 2022
1 parent 04487a4 commit 0f96d38
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions modules/featureCounts.nf
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,13 @@ process merge_featureCounts {
path 'merged_gene_counts.tsv.gz'

script:
//if we only have 1 file, just use cat and pipe output to csvtk. Else join all files first, and then remove unwanted column names.
def single = input_files instanceof Path ? 1 : input_files.size()
def merge = (single == 1) ? 'cat' : 'csvtk join -t -f "Geneid,Start,Length,End,Chr,Strand,gene_name"'
"""
$merge $input_files | csvtk cut -t -f "-Start,-Chr,-End,-Length,-Strand" | sed 's/Aligned.sortedByCoord.out.markDups.bam//g' | sed 's/.sorted.bam//g' | csvtk rename -t -f Geneid -n phenotype_id | csvtk cut -t -f "-gene_name" | gzip -c > merged_gene_counts.tsv.gz
paste -d"\t" $input_files > merged_raw_all.tsv
csvtk cut -t -f 1 merged_raw_all.tsv | \
csvtk rename -t -f Geneid -n phenotype_id > phenotype_ids_column.tsv
csvtk cut -t -F -f "*.sorted.bam" merged_raw_all.tsv | sed 's/.sorted.bam//g' > merged_genes_no_phenotype_id.tsv
paste -d"\t" phenotype_ids_column.tsv merged_genes_no_phenotype_id.tsv | gzip -c > merged_gene_counts.tsv.gz
"""
}

0 comments on commit 0f96d38

Please sign in to comment.