forked from hgrandjean/Anti-Fungi-Peptide
-
Notifications
You must be signed in to change notification settings - Fork 0
/
peptide_physical_analysis.py
101 lines (76 loc) · 2.79 KB
/
peptide_physical_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from Bio import SeqIO
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio.SeqUtils import ProtParamData # Local https://github.com/biopython/biopython/blob/master/Bio/SeqUtils/ProtParamData.py
from Bio.SeqUtils import IsoelectricPoint # Local
from Bio.Seq import Seq
from Bio.Data import IUPACData
from Bio.SeqUtils import molecular_weight
import helixvis
import matplotlib.pyplot as plt
import numpy as np
import pylab as P
import math
from scipy import signal
from scipy.signal import find_peaks
from scipy.fft import fft, fftfreq, fftshift , rfft
multi_fasta = [record for record in SeqIO.parse("positive_db_size.fasta", "fasta")]
space = np.array([])
loc = np.array([])
for seq in multi_fasta :
pa = ProteinAnalysis(str(seq.seq))
print(pa)
print(pa.secondary_structure_fraction()[0])
print(pa.charge_at_pH(7))
print(pa.protein_scale(ProtParamData.kd , 2, edge=1.0))
hydropho = pa.protein_scale(ProtParamData.kd, 2, edge=1.0)
size = len(hydropho)
#autocorrelation of signal
'''
x = np.array(hydropho)
# Mean
mean = np.mean(hydropho)
# Variance
var = np.var(hydropho)
# Normalized data
ndata = hydropho - mean
acorr = np.correlate(ndata, ndata, 'full')[len(ndata)-1:]
acorr = acorr / var / len(ndata)
'''
# get location of hydrophoilic residues
peaks, _ = find_peaks(hydropho, distance=2)
loc=np.append(loc,peaks)
space = np.append(space,np.mean(np.diff(peaks))) #space between hydrophilic residues
#plt.plot(range(len(hydropho)), hydropho ) #
#plt.plot(range(size), acorr )
loc = loc.flatten()
space = space.flatten()
print(loc)
print(space)
plt.hist(space)
#wheel =helixvis.draw_wheel(str(seq.seq))
#plt.title('Auto-correlation of hydrophobicity in the peptides sequences')
plt.show()
'''
for seq in seqs :
print('\n > ',seq)
#code here
seq_ = seq+ "_"* (18 - len(seq))
hydrophobicity = {"A": 0.62, "R": -2.53, "N": -0.78, "D": -0.9, "C": 0.29,
"Q": -0.85, "E": -0.74, "G": 0.48, "H": -0.4, "I": 1.38,
"L": 1.06, "K": -1.5, "M": 0.64, "F": 1.19, "P": 0.12,
"S": -0.18, "T": -0.05, "W": 0.81, "Y": 0.26, "V": 1.08 , "_": 0} #Eisenberg hydrophobicity score consensus computed from alpha helix and beta sheet
P1= np.array( [0, 4, 8 ,12, 16])
P2= np.array( [1, 5, 9,13, 17])
P3= np.array( [2, 6, 10,14])
P4= np.array( [3, 7, 11,15])
F1= np.array([seq_[p] for p in P1])
F2= np.array([seq_[p] for p in P2])
F3= np.array([seq_[p] for p in P3])
F4= np.array([seq_[p] for p in P4])
H1 = sum(hydrophobicity[aa] for aa in F1)
H2 = sum(hydrophobicity[aa] for aa in F2)
H3 = sum(hydrophobicity[aa] for aa in F3)
H4 = sum(hydrophobicity[aa] for aa in F4)
#wheel =helixvis.draw_wheel(seq)
#plt.show()
'''