-
Notifications
You must be signed in to change notification settings - Fork 4
/
MALVA
executable file
·126 lines (111 loc) · 3.65 KB
/
MALVA
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/bin/bash
SCRIPT_NAME=$(basename $0)
BIN_NAME="malva-geno"
EXECUTABLE="$(dirname $0)/bin/${BIN_NAME}"
KMC_BIN="kmc"
DEFAULT_KSIZE=35
DEFAULT_REF_KSIZE=43
DEFAULT_ERR_RATE=0.001
DEFAULT_FREQ="AF"
DEFAULT_SAMPLES="-"
DEFAULT_MAX_COV=200
DEFAULT_BF_SIZE=4
DEFAULT_MAX_MEM=4
USAGE=$'\nUsage: '"${SCRIPT_NAME}"' [-k KMER-SIZE] [-r REF-KMER-SIZE] [-c MAX-COV] <reference> <variants> <sample>
Arguments:
-h print this help and exit
-k size of the kmers to index (default:'"${DEFAULT_KSIZE}"')
-r size of the reference kmers to index (default:'"${DEFAULT_REF_KSIZE}"')
-e expected sample error rate (default:'"${DEFAULT_ERR_RATE}"')
-s file containing the list of (VCF) samples to consider (default:'"${DEFAULT_SAMPLES}"', i.e. all samples)
-f a priori frequency key in the INFO column of the input VCF (default:'"${DEFAULT_FREQ}"')
-c maximum coverage for variant alleles (default:'"${DEFAULT_MAX_COV}"')
-b bloom filter size in GB (default:'"${DEFAULT_BF_SIZE}"')
-m max amount of RAM in GB - KMC parameter (default:'"${DEFAULT_MAX_MEM}"')
-p strip \"chr\" from sequence names (dafault:false)
-u use uniform a priori probabilities (default:false)
-v output COVS and GTS in INFO column (default: false)
-1 run MALVA in haploid mode (default: false)
Positional arguments:
<reference> reference file in FASTA format
<variants> variants file in VCF format
<sample> sample file in FASTA/FASTQ format
'
reference=""
vcf=""
sample=""
k=${DEFAULT_KSIZE}
refk=${DEFAULT_REF_KSIZE}
erate=${DEFAULT_ERR_RATE}
freq=${DEFAULT_FREQ}
samples=${DEFAULT_SAMPLES}
maxcov=${DEFAULT_MAX_COV}
bfsize=${DEFAULT_BF_SIZE}
maxmem=${DEFAULT_MAX_MEM}
strip_chr=""
uniform=""
verbose=""
haploid=""
while getopts "k:r:e:f:s:c:b:m:hpuv1" flag; do
case "${flag}" in
h) $(>&2 echo "${USAGE}")
exit 0
;;
k) k=${OPTARG}
;;
r) refk=${OPTARG}
;;
e) erate=${OPTARG}
;;
f) freq=${OPTARG}
;;
s) samples=${OPTARG}
;;
c) maxcov=${OPTARG}
;;
b) bfsize=${OPTARG}
;;
m) maxmem=${OPTARG}
;;
p) strip_chr="-p"
;;
u) uniform="-u"
;;
v) verbose="-v"
;;
1) haploid="-1"
;;
esac
done
if [[ $# -lt $((${OPTIND} + 2)) ]]
then
(>&2 echo "ERROR: Wrong number of arguments.")
(>&2 echo "")
(>&2 echo "${USAGE}")
exit 1
fi
reference=${@:$OPTIND:1}
vcf_file=${@:$OPTIND+1:1}
sample=${@:$OPTIND+2:1}
kmc_tmp_dir=${sample}_malva_kmc_${refk}_tmp
kmc_out_prefix=${sample}_malva${refk}.kmercount
mkdir -p ${kmc_tmp_dir}
if [ ! -f ${kmc_out_prefix}.kmc_pre ] && [ ! -f ${kmc_out_prefix}.kmc_suf ]
then
(>&2 echo "[${SCRIPT_NAME}] Running KMC")
${KMC_BIN} -m${maxmem} -k${refk} -t1 -fm ${sample} ${kmc_out_prefix} ${kmc_tmp_dir} &> ${kmc_out_file}.log
else
(>&2 echo "[${SCRIPT_NAME}] Found KMC output")
fi
(>&2 echo "[${SCRIPT_NAME}] Running ${BIN_NAME} index")
if [ ! -f ${vcf_file}."c"${refk}".k"${k}".malvax" ]
then
${EXECUTABLE} index ${strip_chr} ${verbose} ${haploid} ${uniform} -k ${k} -r ${refk} -e ${erate} -f ${freq} -s ${samples} -c ${maxcov} -b ${bfsize} ${reference} ${vcf_file} ${kmc_out_prefix}
else
(>&2 echo "[${SCRIPT_NAME}] Index file exists already. Skipping creation.")
fi
(>&2 echo "[${SCRIPT_NAME}] Running ${BIN_NAME} call")
${EXECUTABLE} call ${strip_chr} ${verbose} ${haploid} ${uniform} -k ${k} -r ${refk} -e ${erate} -f ${freq} -s ${samples} -c ${maxcov} -b ${bfsize} ${reference} ${vcf_file} ${kmc_out_prefix}
(>&2 echo "[${SCRIPT_NAME}] Cleaning up")
rm -rf ${kmc_tmp_dir}
exit 0