forked from albertwcheng/albert-bioinformatics-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
appendTissueSpecExpressionEntropy.py
executable file
·65 lines (52 loc) · 1.09 KB
/
appendTissueSpecExpressionEntropy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
from math import log
from sys import *
from albertcommon import *
def entropy(values):
totalwgt=float(sum(values))
Hg=0.0
for wgt in values:
Ptg=wgt/totalwgt
#print >> stderr,Ptg
Hg-=1*Ptg*log(Ptg,2)
return Hg
def toFloatList(fields,noise):
values=[]
for f in fields:
try:
values.append(float(f)+noise)
except:
pass
return values
noise=0.000001
if __name__=='__main__':
programName=argv[0]
args=argv[1:]
try:
matrix,valcols=args
except:
print >> stderr,programName,"matrixFile valcols > outputFileWithHgAppended"
explainColumns(stderr)
exit()
fs="\t"
startRow=2
headerRow=1
header,prestarts=getHeader(matrix,headerRow,startRow,fs)
valcols=getCol0ListFromCol1ListStringAdv(header,valcols)
lino=0
fil=open(matrix)
for lin in fil:
lino+=1
lin=lin.rstrip("\r\n")
fields=lin.split(fs)
if lino<startRow:
fields+=["Hg"]
else:
values=toFloatList(getSubvector(fields,valcols),noise)
if len(values)==0:
Hg="NA"
else:
Hg=entropy(values)
fields+=[str(Hg)]
print >> stdout,fs.join(fields)
fil.close()