From 451f8cc08b4203454f8295eb8ea062230835118d Mon Sep 17 00:00:00 2001 From: Lee-Ping Wang Date: Mon, 15 Jan 2018 18:47:49 -0800 Subject: [PATCH] Rewrite write_pdb() function --- src/molecule.py | 275 ++++++++++++++++++++++-------------------------- 1 file changed, 126 insertions(+), 149 deletions(-) diff --git a/src/molecule.py b/src/molecule.py index 869d235ae..c0e849dbe 100644 --- a/src/molecule.py +++ b/src/molecule.py @@ -153,7 +153,8 @@ AllVariableNames = QuantumVariableNames | AtomVariableNames | MetaVariableNames | FrameVariableNames # OrderedDict requires Python 2.7 or higher -import os, sys, re, copy, time +import os, sys, re, copy +from datetime import date import numpy as np from numpy import sin, cos, arcsin, arccos import imp @@ -451,6 +452,18 @@ def format_xyz_coord(element,xyz,tinker=False): else: return "%-5s % 15.10f % 15.10f % 15.10f" % (element,xyz[0],xyz[1],xyz[2]) +def _format_83(f): + """Format a single float into a string of width 8, with ideally 3 decimal + places of precision. If the number is a little too large, we can + gracefully degrade the precision by lopping off some of the decimal + places. If it's much too large, we throw a ValueError""" + if -999.999 < f < 9999.999: + return '%8.3f' % f + if -9999999 < f < 99999999: + return ('%8.3f' % f)[:8] + raise ValueError('coordinate "%s" could not be represented ' + 'in a width-8 field' % f) + def format_gro_coord(resid, resname, aname, seqno, xyz): """ Print a line in accordance with .gro file format, with six decimal points of precision @@ -3872,52 +3885,74 @@ def write_dcd(self, selection, **kwargs): dcd = None def write_pdb(self, selection, **kwargs): - """Save to a PDB. Copied wholesale from MSMBuilder. """ - + standardResidues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR', # Standard amino acids + 'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL', # Standard amino acids + 'HID', 'HIE', 'HIP', 'ASH', 'GLH', 'TYD', 'CYM', 'CYX', 'LYN', # Some alternate protonation states + 'PTR', 'SEP', 'TPO', 'Y1P', 'S1P', 'T1P', # Phosphorylated amino acids + 'HOH', 'SOL', 'WAT', # Common residue names for water + 'A', 'G', 'C', 'U', 'I', 'DA', 'DG', 'DC', 'DT', 'DI'] + # When converting from pdb to xyz in interactive prompt, + # ask user for some PDB-specific info. if sys.stdin.isatty(): self.require('xyzs') self.require_resname() self.require_resid() else: self.require('xyzs','resname','resid') - write_conect = kwargs.pop('write_conect', 1) - + # Create automatic atom names if not present + # in data structure: these are just placeholders. if 'atomname' not in self.Data: count = 0 resid = -1 - ATOMS = [] + atomnames = [] for i in range(self.na): if self.resid[i] != resid: count = 0 count += 1 resid = self.resid[i] - ATOMS.append("%s%i" % (self.elem[i], count)) + atomnames.append("%s%i" % (self.elem[i], count)) + self.atomname = atomnames + # Standardize formatting of atom names. + atomNames = [] + for i, atomname in enumerate(self.atomname): + if len(atomname) < 4 and atomname[:1].isalpha() and len(self.elem[i]) < 2: + atomName = ' '+atomname + elif len(atomname) > 4: + atomName = atomname[:4] + else: + atomName = atomname + atomNames.append(atomName) + # Chain names. Default to 'A' for everything + if 'chain' not in self.Data: + chainNames = ['A' for i in range(self.na)] else: - ATOMS = self.atomname - - CHAIN = self.chain if 'chain' in self.Data else [1 for i in range(self.na)] - RESNAMES = self.resname - RESNUMS = self.resid - + chainNames = [i[0] if len(i)>0 else ' ' for i in self.chain] + # Standardize formatting of residue names. + resNames = [] + for resname in self.resname: + if len(resname) > 3: + resName = resname[:3] + else: + resName = resname + resNames.append(resName) + # Standardize formatting of residue IDs. + resIds = [] + for resid in self.resid: + resIds.append("%4d" % (resid%10000)) + # Standardize record names. + records = [] + for resname in resNames: + if resname in ['HOH', 'SOL', 'WAT']: + records.append("HETATM") + elif resname in standardResidues: + records.append("ATOM ") + else: + records.append("HETATM") + out = [] - if min(RESNUMS) == 0: - RESNUMS = [i+1 for i in RESNUMS] - - """ - CRYST1 line, added by Lee-Ping - COLUMNS TYPE FIELD DEFINITION - --------------------------------------- - 7-15 float a a (Angstroms). - 16-24 float b b (Angstroms). - 25-33 float c c (Angstroms). - 34-40 float alpha alpha (degrees). - 41-47 float beta beta (degrees). - 48-54 float gamma gamma (degrees). - 56-66 string sGroup Space group. - 67-70 int z Z value. - """ - + # Create the PDB header. + out.append("REMARK 1 CREATED WITH FORCEBALANCE %s" % (str(date.today()))) if 'boxes' in self.Data: a = self.boxes[0].a b = self.boxes[0].b @@ -3925,126 +3960,68 @@ def write_pdb(self, selection, **kwargs): alpha = self.boxes[0].alpha beta = self.boxes[0].beta gamma = self.boxes[0].gamma - line=np.chararray(80) - line[:] = ' ' - line[0:6]=np.array(list("CRYST1")) - line=np.array(line,'str') - line[6:15] =np.array(list(("%9.3f"%(a)))) - line[15:24]=np.array(list(("%9.3f"%(b)))) - line[24:33]=np.array(list(("%9.3f"%(c)))) - line[33:40]=np.array(list(("%7.2f"%(alpha)))) - line[40:47]=np.array(list(("%7.2f"%(beta)))) - line[47:54]=np.array(list(("%7.2f"%(gamma)))) - # LPW: Put in a dummy space group, we never use it. - line[55:66]=np.array(list(str("P 21 21 21").rjust(11))) - line[66:70]=np.array(list(str(4).rjust(4))) - out.append(''.join(line.tolist())) - - for I in selection: - XYZ = self.xyzs[I] - Serial = 1 + out.append("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1 " % (a, b, c, alpha, beta, gamma)) + # Write the structures as models. + atomIndices = {} + for sn in range(len(self)): + modelIndex = sn + if len(self) > 1: + out.append("MODEL %4d" % modelIndex) + atomIndex = 1 for i in range(self.na): - """ - ATOM line. - COLUMNS TYPE FIELD DEFINITION - --------------------------------------------- - 7-11 int serial Atom serial number. - 13-16 string name Atom name. - 17 string altLoc Alternate location indicator. - 18-20 (17-21 KAB) string resName Residue name. - 22 string chainID Chain identifier. - 23-26 int resSeq Residue sequence number. - 27 string iCode Code for insertion of residues. - 31-38 float x Orthogonal coordinates for X in - Angstroms. - 39-46 float y Orthogonal coordinates for Y in - Angstroms. - 47-54 float z Orthogonal coordinates for Z in - Angstroms. - 55-60 float occupancy Occupancy. - 61-66 float tempFactor Temperature factor. - 73-76 string segID Segment identifier, left-justified. - 77-78 string element Element symbol, right-justified. - 79-80 string charge Charge on the atom. - """ - line=np.chararray(80) - line[:]=' ' - line[0:4]=np.array(list("ATOM")) - line=np.array(line,'str') - line[6:11]=np.array(list(str(Serial%100000).rjust(5))) - # if Serial < 100000: - # line[6:11]=np.array(list(str(Serial%100000).rjust(5))) - # else: - # line[6:11]=np.array(list(hex(Serial)[2:].rjust(5))) - #Molprobity is picky about atom name centering - if len(str(ATOMS[i]))==3: - line[12:16]=np.array(list(str(ATOMS[i]).rjust(4))) - elif len(str(ATOMS[i]))==2: - line[12:16]=np.array(list(" "+str(ATOMS[i])+" ")) - elif len(str(ATOMS[i]))==1: - line[12:16]=np.array(list(" "+str(ATOMS[i])+" ")) - elif len(str(ATOMS[i]))==4: - line[12:16]=np.array(list(str(ATOMS[i]).center(4))) - else: # QYD: if > 4, reduse atomname to 4 letters - line[12:16]=np.array(list(str(ATOMS[i])[0]+str(ATOMS[i])[-3:])) - if len(str(RESNAMES[i]))==3: - line[17:20]=np.array(list(str(RESNAMES[i]))) - else: - line[17:21]=np.array(list(str(RESNAMES[i]).ljust(4))) - - line[21]=str(CHAIN[i]).rjust(1) - line[22:26]=np.array(list(str(RESNUMS[i]%10000).rjust(4))) - # if RESNUMS[i] < 100000: - # line[22:26]=np.array(list(str(RESNUMS[i]).rjust(4))) - # else: - # line[22:26]=np.array(list(hex(RESNUMS[i])[2:].rjust(4))) - - x=XYZ[i][0] - y=XYZ[i][1] - z=XYZ[i][2] - sx=np.sign(x) - sy=np.sign(y) - sz=np.sign(z) - - line[30:38]=np.array(list(("%8.3f"%(x)))) - line[38:46]=np.array(list(("%8.3f"%(y)))) - line[46:54]=np.array(list(("%8.3f"%(z)))) - if hasattr(self, 'elem'): - line[76:78]=np.array(list("%2s" % self.elem[i])) - - if Serial!=-1: - out.append(''.join(line.tolist())) - Serial += 1 - - if 'terminal' in self.Data and self.terminal[i]: - """ - TER line, added by Lee-Ping - COLUMNS TYPE FIELD DEFINITION - ------------------------------------------- - 7-11 int serial Serial number. - 18-20 string resName Residue name. - 22 string chainID Chain identifier. - 23-26 int resSeq Residue sequence number. - 27 string iCode Insertion code. - """ - line=np.chararray(27) - line[:] = ' ' - line[0:3]=np.array(list("TER")) - line[6:11]=np.array(list(str(Serial%100000).rjust(5))) - if len(str(RESNAMES[i]))==3: - line[17:20]=np.array(list(str(RESNAMES[i]))) - else: - line[17:21]=np.array(list(str(RESNAMES[i]).ljust(4))) - line[21]=str(CHAIN[i]).rjust(1) - line[22:26]=np.array(list(str(RESNUMS[i]%10000).rjust(4))) - out.append(''.join(line.tolist())) - Serial += 1 - out.append('ENDMDL') - if 'bonds' in self.Data and write_conect: - connects = ["CONECT%5i" % (b0+1) + "".join(["%5i" % (b[1]+1) for b in self.bonds if b[0] == b0]) for b0 in sorted(list(set(b[0] for b in self.bonds)))] - out += connects - return out - + recordName = records[i] + atomName = atomNames[i] + resName = resNames[i] + chainName = chainNames[i] + resId = resIds[i] + coords = self.xyzs[sn][i] + symbol = self.elem[i] + atomIndices[i] = atomIndex + line = "%s%5d %-4s %3s %s%4s %s%s%s 1.00 0.00 %2s " % ( + recordName, atomIndex%100000, atomName, resName, chainName, resId, _format_83(coords[0]), + _format_83(coords[1]), _format_83(coords[2]), symbol) + assert len(line) == 80, 'Fixed width overflow detected' + out.append(line) + atomIndex += 1 + if i < (self.na-1) and chainName != chainNames[i+1]: + out.append("TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId)) + atomIndex += 1 + out.append("TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId)) + if len(self) > 1: + out.append("ENDMDL") + conectBonds = [] + if 'bonds' in self.Data: + for i, j in self.bonds: + if i > j: continue + if self.resname[i] not in standardResidues or self.resname[j] not in standardResidues: + conectBonds.append((i, j)) + elif self.atomname[i] == 'SG' and self.atomname[j] == 'SG' and self.resname[i] == 'CYS' and self.resname[j] == 'CYS': + conectBonds.append((i, j)) + elif self.atomname[i] == 'SG' and self.atomname[j] == 'SG' and self.resname[i] == 'CYX' and self.resname[j] == 'CYX': + conectBonds.append((i, j)) + + atomBonds = {} + for atom1, atom2 in conectBonds: + index1 = atomIndices[atom1] + index2 = atomIndices[atom2] + if index1 not in atomBonds: + atomBonds[index1] = [] + if index2 not in atomBonds: + atomBonds[index2] = [] + atomBonds[index1].append(index2) + atomBonds[index2].append(index1) + + for index1 in sorted(atomBonds): + bonded = atomBonds[index1] + while len(bonded) > 4: + out.append("CONECT%5d%5d%5d%5d" % (index1, bonded[0], bonded[1], bonded[2])) + del bonded[:4] + line = "CONECT%5d" % index1 + for index2 in bonded: + line = "%s%5d" % (line, index2) + out.append(line) + return(out) + def write_qdata(self, selection, **kwargs): """ Text quantum data format. """ #self.require('xyzs','qm_energies','qm_grads')