-
Notifications
You must be signed in to change notification settings - Fork 2
/
Macrocycle_extractor_V2.py
61 lines (49 loc) · 1.81 KB
/
Macrocycle_extractor_V2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from rdkit import Chem
from rdkit.Chem import AllChem
import sys
if __name__ == "__main__":
try:
print ('''
*** USAGE: python Macrocycle_extractor.py input_file.sdf output_file.sdf ***
''')
file=sys.argv[1]
out_file= sys.argv[2]
chem=Chem.SDMolSupplier(file)
to_file=[]
for molecule in chem:
found_macrocycles=[]
if molecule !=None:
molecule=Chem.AddHs(molecule)
Chem.Kekulize (molecule, clearAromaticFlags=True)
all_cycles = Chem.GetSymmSSSR(molecule)
for cycle in all_cycles:
if len (cycle) >= 8:
found_macrocycles.append (cycle)
longest_macro=[]
for macro in found_macrocycles:
longest_macro.append (len(macro))
if len(macro) == max (longest_macro):
macrocycle = macro
macrocycle_final_structure = Chem.RWMol(molecule)
all_atoms=[]
atoms_to_remove=[]
print (molecule.GetProp ('_Name'))
macrocycle_atoms=list(macrocycle)
all_atoms+=[atom.GetIdx() for atom in molecule.GetAtoms()]
atoms_to_remove=(list(set(all_atoms) - set(macrocycle_atoms)))
print ('Initial Num Atoms:',macrocycle_final_structure.GetNumAtoms())
print ('Macrocycle length:',len (macrocycle_atoms))
for i in sorted(atoms_to_remove,reverse=True):
if i < macrocycle_final_structure.GetNumAtoms ():
macrocycle_final_structure.RemoveAtom (i)
print ('Final Num Atoms:',macrocycle_final_structure.GetNumAtoms())
to_file.append (macrocycle_final_structure)
print ('-- -- -- -- -- -- -- -- -- -- --')
output = Chem.SDWriter(out_file)
for element in to_file:
Chem.RemoveStereochemistry (element)
#Chem.SanitizeMol(element,sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL^Chem.SANITIZE_ADJUSTHS^Chem.SANITIZE_SETAROMATICITY^Chem.SANITIZE_KEKULIZE)
output.write (element)
output.close ()
except Exception:
pass