-
Notifications
You must be signed in to change notification settings - Fork 2
/
softohard_mask.py
32 lines (25 loc) · 783 Bytes
/
softohard_mask.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
### this script makes softmasked genome into hardmasked genome
### simply by converting all lower case letter into 'N'
import os
path_file='/wrk/shni/eel/genomes/hg38_iupac_masked.fa'
path_file2='/wrk/shni/eel/genomes/test.fa'
def maskline(string):
if string.islower():
return 'N'*(len(string)-1)+'\n'
elif string.isupper():
return string
else:
result=[]
for char in string:
result.append(char if char.isupper() else 'N')
return ''.join(result[:-1])+'\n'
file = open(path_file, 'r')
masked_file=open(file.name.split('.')[0]+'_masked.'+file.name.split('.')[1],'w')
for lines in file:
if lines.startswith('>'):
print 'now starts processing %s' %lines[1:]
masked_file.write(lines)
else:
masked_file.write(maskline(lines))
file.close()
masked_file.close()