Skip to content

Commit

Permalink
added aliphatic index, boman index and hydrophobicity to the features
Browse files Browse the repository at this point in the history
  • Loading branch information
yayekit committed Sep 6, 2024
1 parent eeee81b commit c19d645
Showing 1 changed file with 34 additions and 4 deletions.
38 changes: 34 additions & 4 deletions features.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ def extract_features(sequence: Seq) -> Dict[str, float]:
charge = calculate_charge(sequence)
hydrophobic_ratio = sum(amino_acid_count[aa] for aa in 'AILMFPWV') / len(sequence)

# Additional features
aliphatic_index = calculate_aliphatic_index(amino_acid_percent)
boman_index = calculate_boman_index(sequence)
hydrophobicity = calculate_hydrophobicity(sequence)

return {
'length': len(sequence),
'weight': analyser.molecular_weight(),
Expand All @@ -22,9 +27,12 @@ def extract_features(sequence: Seq) -> Dict[str, float]:
'turn_fraction': analyser.secondary_structure_fraction()[1],
'sheet_fraction': analyser.secondary_structure_fraction()[2],
'gravy': analyser.gravy(),
'charge': charge, # New feature
'hydrophobic_ratio': hydrophobic_ratio, # New feature
'flexibility': analyser.flexibility(), # New feature
'charge': charge,
'hydrophobic_ratio': hydrophobic_ratio,
'flexibility': analyser.flexibility(),
'aliphatic_index': aliphatic_index, # New feature
'boman_index': boman_index, # New feature
'hydrophobicity': hydrophobicity, # New feature
**{f'{aa}_percent': percent for aa, percent in amino_acid_percent.items()}
}

Expand All @@ -48,4 +56,26 @@ def compute_conjoint_triad(sequence: str) -> List[int]:
triad = (groups[sequence[i]], groups[sequence[i+1]], groups[sequence[i+2]])
features[triad[0]*49 + triad[1]*7 + triad[2]] += 1

return features
return features

def calculate_aliphatic_index(amino_acid_percent: Dict[str, float]) -> float:
"""Calculate the aliphatic index of a protein sequence."""
return (100 * amino_acid_percent.get('A', 0) +
2.9 * amino_acid_percent.get('V', 0) +
3.9 * (amino_acid_percent.get('I', 0) + amino_acid_percent.get('L', 0)))

def calculate_boman_index(sequence: Seq) -> float:
"""Calculate the Boman (Potential Protein Interaction) index."""
aa_values = {'L': -4.92, 'I': -4.92, 'V': -4.04, 'F': -2.98, 'M': -2.35,
'W': -2.33, 'A': -1.89, 'C': -1.85, 'G': -1.05, 'Y': -0.14,
'T': 0.69, 'S': 0.84, 'H': 2.06, 'Q': 2.36, 'K': 2.71,
'N': 2.95, 'E': 3.81, 'D': 3.98, 'R': 4.38, 'P': 0}
return sum(aa_values.get(aa, 0) for aa in str(sequence)) / len(sequence)

def calculate_hydrophobicity(sequence: Seq) -> float:
"""Calculate the overall hydrophobicity of a protein sequence using the Kyte-Doolittle scale."""
kd_scale = {'A': 1.8, 'R': -4.5, 'N': -3.5, 'D': -3.5, 'C': 2.5,
'Q': -3.5, 'E': -3.5, 'G': -0.4, 'H': -3.2, 'I': 4.5,
'L': 3.8, 'K': -3.9, 'M': 1.9, 'F': 2.8, 'P': -1.6,
'S': -0.8, 'T': -0.7, 'W': -0.9, 'Y': -1.3, 'V': 4.2}
return sum(kd_scale.get(aa, 0) for aa in str(sequence)) / len(sequence)

0 comments on commit c19d645

Please sign in to comment.