Skip to content

Commit

Permalink
Rewrite write_pdb() function
Browse files Browse the repository at this point in the history
  • Loading branch information
leeping committed Jan 16, 2018
1 parent 4bdf288 commit 451f8cc
Showing 1 changed file with 126 additions and 149 deletions.
275 changes: 126 additions & 149 deletions src/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@
AllVariableNames = QuantumVariableNames | AtomVariableNames | MetaVariableNames | FrameVariableNames

# OrderedDict requires Python 2.7 or higher
import os, sys, re, copy, time
import os, sys, re, copy
from datetime import date
import numpy as np
from numpy import sin, cos, arcsin, arccos
import imp
Expand Down Expand Up @@ -451,6 +452,18 @@ def format_xyz_coord(element,xyz,tinker=False):
else:
return "%-5s % 15.10f % 15.10f % 15.10f" % (element,xyz[0],xyz[1],xyz[2])

def _format_83(f):
"""Format a single float into a string of width 8, with ideally 3 decimal
places of precision. If the number is a little too large, we can
gracefully degrade the precision by lopping off some of the decimal
places. If it's much too large, we throw a ValueError"""
if -999.999 < f < 9999.999:
return '%8.3f' % f
if -9999999 < f < 99999999:
return ('%8.3f' % f)[:8]
raise ValueError('coordinate "%s" could not be represented '
'in a width-8 field' % f)

def format_gro_coord(resid, resname, aname, seqno, xyz):
""" Print a line in accordance with .gro file format, with six decimal points of precision
Expand Down Expand Up @@ -3872,179 +3885,143 @@ def write_dcd(self, selection, **kwargs):
dcd = None

def write_pdb(self, selection, **kwargs):
"""Save to a PDB. Copied wholesale from MSMBuilder. """

standardResidues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR', # Standard amino acids
'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL', # Standard amino acids
'HID', 'HIE', 'HIP', 'ASH', 'GLH', 'TYD', 'CYM', 'CYX', 'LYN', # Some alternate protonation states
'PTR', 'SEP', 'TPO', 'Y1P', 'S1P', 'T1P', # Phosphorylated amino acids
'HOH', 'SOL', 'WAT', # Common residue names for water
'A', 'G', 'C', 'U', 'I', 'DA', 'DG', 'DC', 'DT', 'DI']
# When converting from pdb to xyz in interactive prompt,
# ask user for some PDB-specific info.
if sys.stdin.isatty():
self.require('xyzs')
self.require_resname()
self.require_resid()
else:
self.require('xyzs','resname','resid')

write_conect = kwargs.pop('write_conect', 1)

# Create automatic atom names if not present
# in data structure: these are just placeholders.
if 'atomname' not in self.Data:
count = 0
resid = -1
ATOMS = []
atomnames = []
for i in range(self.na):
if self.resid[i] != resid:
count = 0
count += 1
resid = self.resid[i]
ATOMS.append("%s%i" % (self.elem[i], count))
atomnames.append("%s%i" % (self.elem[i], count))
self.atomname = atomnames
# Standardize formatting of atom names.
atomNames = []
for i, atomname in enumerate(self.atomname):
if len(atomname) < 4 and atomname[:1].isalpha() and len(self.elem[i]) < 2:
atomName = ' '+atomname
elif len(atomname) > 4:
atomName = atomname[:4]
else:
atomName = atomname
atomNames.append(atomName)
# Chain names. Default to 'A' for everything
if 'chain' not in self.Data:
chainNames = ['A' for i in range(self.na)]
else:
ATOMS = self.atomname

CHAIN = self.chain if 'chain' in self.Data else [1 for i in range(self.na)]
RESNAMES = self.resname
RESNUMS = self.resid

chainNames = [i[0] if len(i)>0 else ' ' for i in self.chain]
# Standardize formatting of residue names.
resNames = []
for resname in self.resname:
if len(resname) > 3:
resName = resname[:3]
else:
resName = resname
resNames.append(resName)
# Standardize formatting of residue IDs.
resIds = []
for resid in self.resid:
resIds.append("%4d" % (resid%10000))
# Standardize record names.
records = []
for resname in resNames:
if resname in ['HOH', 'SOL', 'WAT']:
records.append("HETATM")
elif resname in standardResidues:
records.append("ATOM ")
else:
records.append("HETATM")

out = []
if min(RESNUMS) == 0:
RESNUMS = [i+1 for i in RESNUMS]

"""
CRYST1 line, added by Lee-Ping
COLUMNS TYPE FIELD DEFINITION
---------------------------------------
7-15 float a a (Angstroms).
16-24 float b b (Angstroms).
25-33 float c c (Angstroms).
34-40 float alpha alpha (degrees).
41-47 float beta beta (degrees).
48-54 float gamma gamma (degrees).
56-66 string sGroup Space group.
67-70 int z Z value.
"""

# Create the PDB header.
out.append("REMARK 1 CREATED WITH FORCEBALANCE %s" % (str(date.today())))
if 'boxes' in self.Data:
a = self.boxes[0].a
b = self.boxes[0].b
c = self.boxes[0].c
alpha = self.boxes[0].alpha
beta = self.boxes[0].beta
gamma = self.boxes[0].gamma
line=np.chararray(80)
line[:] = ' '
line[0:6]=np.array(list("CRYST1"))
line=np.array(line,'str')
line[6:15] =np.array(list(("%9.3f"%(a))))
line[15:24]=np.array(list(("%9.3f"%(b))))
line[24:33]=np.array(list(("%9.3f"%(c))))
line[33:40]=np.array(list(("%7.2f"%(alpha))))
line[40:47]=np.array(list(("%7.2f"%(beta))))
line[47:54]=np.array(list(("%7.2f"%(gamma))))
# LPW: Put in a dummy space group, we never use it.
line[55:66]=np.array(list(str("P 21 21 21").rjust(11)))
line[66:70]=np.array(list(str(4).rjust(4)))
out.append(''.join(line.tolist()))

for I in selection:
XYZ = self.xyzs[I]
Serial = 1
out.append("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1 " % (a, b, c, alpha, beta, gamma))
# Write the structures as models.
atomIndices = {}
for sn in range(len(self)):
modelIndex = sn
if len(self) > 1:
out.append("MODEL %4d" % modelIndex)
atomIndex = 1
for i in range(self.na):
"""
ATOM line.
COLUMNS TYPE FIELD DEFINITION
---------------------------------------------
7-11 int serial Atom serial number.
13-16 string name Atom name.
17 string altLoc Alternate location indicator.
18-20 (17-21 KAB) string resName Residue name.
22 string chainID Chain identifier.
23-26 int resSeq Residue sequence number.
27 string iCode Code for insertion of residues.
31-38 float x Orthogonal coordinates for X in
Angstroms.
39-46 float y Orthogonal coordinates for Y in
Angstroms.
47-54 float z Orthogonal coordinates for Z in
Angstroms.
55-60 float occupancy Occupancy.
61-66 float tempFactor Temperature factor.
73-76 string segID Segment identifier, left-justified.
77-78 string element Element symbol, right-justified.
79-80 string charge Charge on the atom.
"""
line=np.chararray(80)
line[:]=' '
line[0:4]=np.array(list("ATOM"))
line=np.array(line,'str')
line[6:11]=np.array(list(str(Serial%100000).rjust(5)))
# if Serial < 100000:
# line[6:11]=np.array(list(str(Serial%100000).rjust(5)))
# else:
# line[6:11]=np.array(list(hex(Serial)[2:].rjust(5)))
#Molprobity is picky about atom name centering
if len(str(ATOMS[i]))==3:
line[12:16]=np.array(list(str(ATOMS[i]).rjust(4)))
elif len(str(ATOMS[i]))==2:
line[12:16]=np.array(list(" "+str(ATOMS[i])+" "))
elif len(str(ATOMS[i]))==1:
line[12:16]=np.array(list(" "+str(ATOMS[i])+" "))
elif len(str(ATOMS[i]))==4:
line[12:16]=np.array(list(str(ATOMS[i]).center(4)))
else: # QYD: if > 4, reduse atomname to 4 letters
line[12:16]=np.array(list(str(ATOMS[i])[0]+str(ATOMS[i])[-3:]))
if len(str(RESNAMES[i]))==3:
line[17:20]=np.array(list(str(RESNAMES[i])))
else:
line[17:21]=np.array(list(str(RESNAMES[i]).ljust(4)))

line[21]=str(CHAIN[i]).rjust(1)
line[22:26]=np.array(list(str(RESNUMS[i]%10000).rjust(4)))
# if RESNUMS[i] < 100000:
# line[22:26]=np.array(list(str(RESNUMS[i]).rjust(4)))
# else:
# line[22:26]=np.array(list(hex(RESNUMS[i])[2:].rjust(4)))

x=XYZ[i][0]
y=XYZ[i][1]
z=XYZ[i][2]
sx=np.sign(x)
sy=np.sign(y)
sz=np.sign(z)

line[30:38]=np.array(list(("%8.3f"%(x))))
line[38:46]=np.array(list(("%8.3f"%(y))))
line[46:54]=np.array(list(("%8.3f"%(z))))
if hasattr(self, 'elem'):
line[76:78]=np.array(list("%2s" % self.elem[i]))

if Serial!=-1:
out.append(''.join(line.tolist()))
Serial += 1

if 'terminal' in self.Data and self.terminal[i]:
"""
TER line, added by Lee-Ping
COLUMNS TYPE FIELD DEFINITION
-------------------------------------------
7-11 int serial Serial number.
18-20 string resName Residue name.
22 string chainID Chain identifier.
23-26 int resSeq Residue sequence number.
27 string iCode Insertion code.
"""
line=np.chararray(27)
line[:] = ' '
line[0:3]=np.array(list("TER"))
line[6:11]=np.array(list(str(Serial%100000).rjust(5)))
if len(str(RESNAMES[i]))==3:
line[17:20]=np.array(list(str(RESNAMES[i])))
else:
line[17:21]=np.array(list(str(RESNAMES[i]).ljust(4)))
line[21]=str(CHAIN[i]).rjust(1)
line[22:26]=np.array(list(str(RESNUMS[i]%10000).rjust(4)))
out.append(''.join(line.tolist()))
Serial += 1
out.append('ENDMDL')
if 'bonds' in self.Data and write_conect:
connects = ["CONECT%5i" % (b0+1) + "".join(["%5i" % (b[1]+1) for b in self.bonds if b[0] == b0]) for b0 in sorted(list(set(b[0] for b in self.bonds)))]
out += connects
return out

recordName = records[i]
atomName = atomNames[i]
resName = resNames[i]
chainName = chainNames[i]
resId = resIds[i]
coords = self.xyzs[sn][i]
symbol = self.elem[i]
atomIndices[i] = atomIndex
line = "%s%5d %-4s %3s %s%4s %s%s%s 1.00 0.00 %2s " % (
recordName, atomIndex%100000, atomName, resName, chainName, resId, _format_83(coords[0]),
_format_83(coords[1]), _format_83(coords[2]), symbol)
assert len(line) == 80, 'Fixed width overflow detected'
out.append(line)
atomIndex += 1
if i < (self.na-1) and chainName != chainNames[i+1]:
out.append("TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId))
atomIndex += 1
out.append("TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId))
if len(self) > 1:
out.append("ENDMDL")
conectBonds = []
if 'bonds' in self.Data:
for i, j in self.bonds:
if i > j: continue
if self.resname[i] not in standardResidues or self.resname[j] not in standardResidues:
conectBonds.append((i, j))
elif self.atomname[i] == 'SG' and self.atomname[j] == 'SG' and self.resname[i] == 'CYS' and self.resname[j] == 'CYS':
conectBonds.append((i, j))
elif self.atomname[i] == 'SG' and self.atomname[j] == 'SG' and self.resname[i] == 'CYX' and self.resname[j] == 'CYX':
conectBonds.append((i, j))

atomBonds = {}
for atom1, atom2 in conectBonds:
index1 = atomIndices[atom1]
index2 = atomIndices[atom2]
if index1 not in atomBonds:
atomBonds[index1] = []
if index2 not in atomBonds:
atomBonds[index2] = []
atomBonds[index1].append(index2)
atomBonds[index2].append(index1)

for index1 in sorted(atomBonds):
bonded = atomBonds[index1]
while len(bonded) > 4:
out.append("CONECT%5d%5d%5d%5d" % (index1, bonded[0], bonded[1], bonded[2]))
del bonded[:4]
line = "CONECT%5d" % index1
for index2 in bonded:
line = "%s%5d" % (line, index2)
out.append(line)
return(out)

def write_qdata(self, selection, **kwargs):
""" Text quantum data format. """
#self.require('xyzs','qm_energies','qm_grads')
Expand Down

0 comments on commit 451f8cc

Please sign in to comment.