-
Notifications
You must be signed in to change notification settings - Fork 0
/
bash.sh
99 lines (80 loc) · 5.18 KB
/
bash.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/bin/bash
####################################################################
# One-step script for running all software on different datasets #
####################################################################
## Date: 2022-4-13
## Author: Hongfei Liu
## Contact: [email protected]
##
## The tree structure of working directory
# root (working directory)
# ├── bw2_index: the bowtie2 index constructed based on hg38 reference fasta
# ├── bwa_index: the bwa index constructed based on hg38 reference fasta
# ├── bw_index: the bowtie index constructed based on hg38 reference fasta
# ├── output: results of raw, converted, and annotated circRNA given by software and related logs
# ├──CIRCexplorer2
# ├──CircDBG
# ├──find_circ
# ├──logs: running logs of software
# ...
# ├── ref_circ: required known circRNA information from circBase and circAtlas which can be used as input for simulating positive datasets
# ├── ref_genome: it contains genomic fasta, annotation GTF, and refGene GTF files of hg38 version as well as fasta files of hg19 at chromosome level
# ├── ref_mrna: the directory contains reference mRNA annotation info from NCBI database
# ├── remap: hg19ToHg38.over.chain, the chain file used by liftOver
# ├── se_index: the genomic index of segemehl
# ├── software: all required software involved in this study
# ├── SRA: all simulated and real short-read RNA-seq datasets
# ├── star_index: the genomic index of STAR
# ... (other optional or temporary files generated by software)
## Generate background datasets
# ART version: 2.5.8
working_directory=$HOME # where you can change the working_directory
art=$working_directory/software/circRNA_detection_review-master/simu/art_illumina
## Change the simulated depths for background datasets (-f 100 -> -f 200 on 20210705)
for coverage in $(seq 5 5 30)
do
mkdir -p $working_directory/SRA/background/background_$coverage/
$art -ss HS25 -d background -na -i ./ref_mrna/ref_mrna.fa -o $working_directory/SRA/background_$coverage/background -l 101 -f $coverage -p -m 350 -s 10 -sp -rs 20210705 -qs -13 -qs2 -13
mv $working_directory/SRA/background_$coverage/background1.fq $working_directory/SRA/background_$coverage/background_${coverage}_1.fastq && mv $working_directory/SRA/background_$coverage/background2.fq $working_directory/SRA/background_$coverage/background_${coverage}_2.fastq && echo "done"
done
## Generate positive datasets
for coverage in $(seq 5 5 30)
do
mkdir -p $working_directory/SRA/postest/pos_$coverage/
perl $working_directory/software/circRNA_detection_review-master/simu/CIRIsimulator.pl -1 $working_directory/SRA/pos_$coverage/pos_${coverage}_1.fastq -2 $working_directory/SRA/pos_$coverage/pos_${coverage}_2.fastq -O $working_directory/SRA/pos_circRNAs.list -G $working_directory/ref_genome/hg38.refGene.gtf -DB $working_directory/ref_circ/combine_final_gh38_database.txt -C $coverage -LC 0 -R 1 -LR 1 -L 100 -E 10 -I 350 -D $working_directory/ref_genome/hg38 -CHR1 0 -M 50
done
## Combine positive and background datasets into mixed datasets
for coverage in $(seq 5 5 30)
do
mkdir -p $working_directory/SRA/mixed_$coverage/
cat $working_directory/SRA/background_$coverage/background_${coverage}_1.fastq $working_directory/SRA/pos_$coverage/pos_${coverage}_1.fastq > $working_directory/SRA/mixed_$coverage/mixed_${coverage}_1.fastq
cat $working_directory/SRA/background_$coverage/background_${coverage}_2.fastq $working_directory/SRA/pos_$coverage/pos_${coverage}_2.fastq > $working_directory/SRA/mixed_$coverage/mixed_${coverage}_2.fastq
done
## Get the reads length of all datasets
python get_reads_length.py $working_directory/SRA $working_directory/readsLen.txt
### Analhysis of software on datasets
## Create the log directory
mkdir -p $working_directory/output/logs/
running_dir=$working_directory/shell_scripts
# # find_circ
{ time nohup bash $running_dir/bash_FC.sh $working_directory &>output/logs/FC.log & } 2>output/logs/FC.time
# # CIRCexplorer2
{ time nohup bash $running_dir/bash_CIRC2.sh $working_directory &>output/logs/CIRC2.log & } 2>output/logs/CIRC2.time
# # CircRNAfinder
{ time nohup bash $running_dir/bash_CF.sh $working_directory &>output/logs/CF.log & } 2>output/logs/CF.time
# # CDBG
{ time nohup bash $running_dir/bash_CDBG.sh $working_directory &>output/logs/CDBG.log & } 2>output/logs/CDBG.time
# # CircMarker
{ time nohup bash $running_dir/bash_CM.sh $working_directory &>output/logs/CM.log & } 2>output/logs/CM.time
# # CIRI2
{ time nohup bash $running_dir/bash_CIRI2.sh $working_directory &>output/logs/CIRI2.log & } 2>output/logs/CIRI2.time
# # DCC
{ time nohup bash $running_dir/bash_DCC.sh $working_directory &>output/logs/DCC.log & } 2>output/logs/DCC.time
# # Mapsplice
{ time nohup bash $running_dir/bash_MP.sh $working_directory &>output/logs/MP.log & } 2>output/logs/MP.time
# # Segemeghl
{ time nohup bash $running_dir/bash_SE.sh $working_directory &>output/logs/SE.log & } 2>output/logs/SE.time
# # KNIFE
{ time nohup bash $running_dir/bash_KF.sh $working_directory &>output/logs/NCL.log & } 2>output/logs/NCL.time
# # UROBORUS
{ time nohup bash $running_dir/bash_UB.sh $working_directory &>output/logs/UB.log & } 2>output/logs/UB.time