-
Notifications
You must be signed in to change notification settings - Fork 2
/
split_barcode.py
66 lines (60 loc) · 1.95 KB
/
split_barcode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
##before doing this its better to use split function to split file into sizes less than 4G
import itertools
import os
############################################################
#def write_file(
barcode_dict=[]
duplicate=[]
barcode=[]
with open('/fs/lustre/wrk/shni/files/case_control.txt','r') as file:
for line in itertools.islice(file,1,None):
barcode.append(line.split())
identity=line.split()[:2]
if identity in barcode_dict:
duplicate.append(identity)
barcode_dict.append(identity)
else:
barcode_dict.append(identity)
##################
duplist=[]
for index, val in enumerate(barcode_dict):
if val in duplicate:
duplist.append(index)
duplist=sorted(duplist,reverse=True)
for index in duplist:
del barcode[index]
file.close()
##############################################################################################
for lines in barcode:
globals()[lines[0]+lines[1]]=[]
obsolete=[]
#############################################################################################
#with open('/fs/lustre/wrk/shni/py_code/test.fastq', 'r') as testfile:
with open('/fs/lustre/wrk/shni/files/8q24/8q24_19', 'r') as testfile:
while True:
code=list(itertools.islice(testfile,4))
if code == []:
print 'file is over'
break
string=code[1][:5]
found = 0
code=code[0]+code[1][6:]+code[2]+code[3][6:]
for lines in barcode:
if string in lines[2:]:
globals()[lines[0]+lines[1]].append(code)
found = 1
break
if found == 0:
obsolete.append(code)
testfile.close()
###############################################################################
with open('/fs/lustre/wrk/shni/files/8q24/obsolete.fastq','a+') as file:
for items in obsolete:
file.write(''.join(items))
file.close()
for lines in barcode:
with open ('/fs/lustre/wrk/shni/files/8q24/%s/%s%s' %(lines[0],lines[0],lines[1]) ,'a+') as finalfile:
string=globals()[lines[0]+lines[1]]
for items in string:
finalfile.write(''.join(items))
finalfile.close()