Skip to content

Commit

Permalink
Merge pull request #11 from michoug/main
Browse files Browse the repository at this point in the history
Replace blast by diamond
  • Loading branch information
hoelzer authored Oct 7, 2023
2 parents 0124694 + 4c156e8 commit 3e3d673
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 3 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,9 @@ build-iPhoneSimulator/

# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
.rvmrc

.nextflow*
work/
conda/
example/*dmnd
results/
31 changes: 31 additions & 0 deletions deprecated/blast_old.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
Run blastp and format output for downstream POCP calculations.
*/
process blast {
label 'blast'
publishDir "${params.output}/blast", mode: 'copy', pattern: "${name}-query-${name2}-db.blast*"

input:
tuple val(name), path(fasta), val(name2), path(fasta2)

output:
tuple env(genome_ids_sorted), path(fasta), file("${name}-query-${name2}-db.blast"), emit: blast
tuple env(genome_ids_sorted), path(fasta), file("${name}-query-${name2}-db.blast.hits"), emit: hits

script:
"""
makeblastdb -in ${fasta2} -dbtype prot #-parse_seqids
blastp -task blastp -num_threads ${task.cpus} -query ${fasta} -db ${fasta2} -evalue ${params.evalue} -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend qlen sstart send evalue bitscore slen" | awk '{if(\$3>${params.seqidentity*100} && \$4>(\$9*${params.alnlength})){print \$0}}' > ${name}-query-${name2}-db.blast
awk '{print \$1}' ${name}-query-${name2}-db.blast | sort | uniq | wc -l | awk '{print \$1}' > ${name}-query-${name2}-db.blast.hits
echo "\t${name}:\tFound \$(cat ${name}-query-${name2}-db.blast.hits) matches with an E value of less than ${params.evalue}, a sequence identity of more than ${params.seqidentity*100}%, and an alignable region of the query protein sequence of more than ${params.alnlength*100}%."
genome_ids_sorted='${name} ${name2}'
genome_ids_sorted=\$(echo \$genome_ids_sorted | xargs -n1 | sort | xargs | sed 's/ /-vs-/g')
#ruby pocp.rb ${fasta} ${fasta2} ${params.evalue} ${params.seqidentity} ${params.alnlength} ./ ${task.cpus}
"""
}

/* Comments:
I removed the -parse_seqids parameter from the makeblastdb command because of an error with fasta IDs that are longer than 50 chars. strange.
*/
6 changes: 6 additions & 0 deletions deprecated/blast_old.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: blast
channels:
- bioconda
dependencies:
- blast=2.9.0

3 changes: 2 additions & 1 deletion envs/blast.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ name: blast
channels:
- bioconda
dependencies:
- blast=2.9.0
- diamond=2.1
- python=3.8

4 changes: 2 additions & 2 deletions modules/blast.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ process blast {

script:
"""
makeblastdb -in ${fasta2} -dbtype prot #-parse_seqids
blastp -task blastp -num_threads ${task.cpus} -query ${fasta} -db ${fasta2} -evalue ${params.evalue} -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend qlen sstart send evalue bitscore slen" | awk '{if(\$3>${params.seqidentity*100} && \$4>(\$9*${params.alnlength})){print \$0}}' > ${name}-query-${name2}-db.blast
diamond makedb --in ${fasta2} -d ${fasta2}.dmnd
diamond blastp --ultra-sensitive -p ${task.cpus} -q ${fasta} -d ${fasta2}.dmnd -e ${params.evalue} --outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend qlen sstart send evalue bitscore slen | awk '{if(\$3>${params.seqidentity*100} && \$4>(\$9*${params.alnlength})){print \$0}}' > ${name}-query-${name2}-db.blast
awk '{print \$1}' ${name}-query-${name2}-db.blast | sort | uniq | wc -l | awk '{print \$1}' > ${name}-query-${name2}-db.blast.hits
echo "\t${name}:\tFound \$(cat ${name}-query-${name2}-db.blast.hits) matches with an E value of less than ${params.evalue}, a sequence identity of more than ${params.seqidentity*100}%, and an alignable region of the query protein sequence of more than ${params.alnlength*100}%."
Expand Down

0 comments on commit 3e3d673

Please sign in to comment.