-
Notifications
You must be signed in to change notification settings - Fork 0
/
hits_abg_joined2.py
88 lines (67 loc) · 2.19 KB
/
hits_abg_joined2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
###!/usr/bin/python
#
#
#
# Nick Gleadall contact: [email protected]
# Kim Brugger contact: [email protected]
import sys
import pprint
import re
import os
pp = pprint.PrettyPrinter(indent=4)
hits = dict()
genes = dict()
hits2 = dict()
infile = "."
if ( len(sys.argv) > 1 ):
infile = sys.argv[1]
for i in os.listdir(infile):
if not i.endswith("_hits.txt"):
continue
# print arg
arg = i
sample_name = arg
sample_name = re.sub(r'(.*)_hits.txt', r'\1', sample_name)
# print "--" + sample_name
hits[ sample_name ] = dict()
hits2[ sample_name ] = dict()
# pp.pprint(hits)
fh = open(arg, 'r')
header = fh.readline() #DEALS WITH HEADER LINE
header_fields = header.split("\t");
field_names = dict()
for i in range(0, len(header_fields)):
field_names[ header_fields[ i ]] = i
#pp.pprint( field_names )
for line in fh:
values = line.split("\t")
#pp.pprint( values )
gene_name = values[ field_names['gene']]
match_len_perc = values[ field_names['Percent id']]
match_len_perc = "%.2f" % float(match_len_perc)
matchlengthpercent = values[ field_names['Match length percent']]
mattchlengthpercent = "%.2f" % float(matchlengthpercent)
# kb hacking
if (gene_name not in hits[ sample_name ]):
hits[ sample_name ][ gene_name ] = []
hits[ sample_name ][ gene_name ].append( match_len_perc )
genes[ gene_name ] = 1
if (gene_name not in hits[sample_name]):
hits[sample_name][gene_name] = []
print ",".join(['Isolate'] + sorted(genes.keys()))
for sample_name in sorted(hits):
line = []
line.append( sample_name )
for gene in sorted(genes.keys()):
if ( gene not in hits[ sample_name ]):
line.append("NA")
else:
if ( len ( hits[ sample_name][gene]) > 1 ):
line.append( "[" + " + ".join( hits[ sample_name ][ gene]) + " ]")
#line.append( " + ".join( hits[ sample_name ][ gene]))
i = 1
else:
# line.append( "".join(hits[ sample_name ][ gene]) )
line.append( hits[ sample_name ][ gene][0] )
#pp.pprint( line )
print ",".join( line )