Skip to content

Commit

Permalink
Pdbxmmcif-gemmi (#112)
Browse files Browse the repository at this point in the history
* preliminary commit, trivial read program compiles

* specify mmcif format

* I can read the various pieces

* all atom properties except bonds work

* added comments re mass and atomic number

* minimal mmcif implementation

* fixed compilation problem

* basic tests passed

* added setting mass (changed AtomicGroup as well)

* missed whitespace change in merge

* updatery
  • Loading branch information
agrossfield committed Dec 28, 2023
1 parent 5e5c5d2 commit 0dd43ca
Show file tree
Hide file tree
Showing 16 changed files with 300 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ endif()
find_package(NetCDF REQUIRED)
include_directories(${NetCDF_INCLUDE_DIRS})

find_package(gemmi)

set(BLA_VENDOR OpenBLAS)
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
Expand Down
3 changes: 3 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
2023-12-28 Alan Grossfield <alan>
* mmcif/pdbx support

2023-01-14 Alan Grossfield <alan>
* make_library missing from install

Expand Down
2 changes: 2 additions & 0 deletions Packages/User/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ set(LOOS_USER_TOOLS
simple_model_transform
traj_calc
traj_transform
test_mmcif
test_mmcif_2
)

foreach(TOOL ${LOOS_USER_TOOLS})
Expand Down
71 changes: 71 additions & 0 deletions Packages/User/test_mmcif.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@

#include <loos.hpp>
#include <gemmi/mmread.hpp>
#include <gemmi/cif.hpp>
#include <gemmi/mmcif.hpp> // cif::Document -> Structure
#include <gemmi/gz.hpp>

namespace cif = gemmi::cif;


int main(int argc, char *argv[]) {
std::string filename = std::string(argv[1]);

loos::AtomicGroup ag;

auto structure = gemmi::read_structure_file(filename, gemmi::CoorFormat::Mmcif);
auto unit_cell = structure.cell;
auto box = loos::GCoord(unit_cell.a, unit_cell.b, unit_cell.c);
ag.periodicBox(box);

// TODO: hard-wired to read first model, but there should probably be a way to read others
auto model = structure.first_model();
int atom_index = 0;
int residue_number = 1;
for (auto chain:model.chains) {
std::string chain_name = chain.name;
for (auto residue:chain.residues) {
std::string residue_name = residue.name;
auto label_seq = residue.label_seq;
std::string res_entity_id = residue.entity_id;
std::cout << "Residue: "
<< residue_number << "\t"
<< label_seq.str() << "\t"
<< res_entity_id << "\t"
<< residue.name << "\t"
<< residue.seqid.num.value << "\t"
<< std::endl;
for (auto atom:residue.atoms) {
loos::pAtom pa(new loos::Atom);
pa->index(atom.serial);
pa->id(atom_index);
pa->name(atom.name);
pa->PDBelement(atom.element.name());
pa->coords().x(atom.pos.x);
pa->coords().y(atom.pos.y);
pa->coords().z(atom.pos.z);
pa->resid(residue.seqid.num.value);
pa->chainId(chain_name);
// might as well fill in segid, even though it's not official
pa->segid(chain_name);
pa->resname(residue.name);
std::cout << atom.name << "\t"
<< atom.element.name() << "\t"
<< atom.pos.x << "\t"
<< atom.pos.y << "\t"
<< atom.pos.z << std::endl;
// TODO: charge is a char*, looks like it's usually "?" in actual
// mmCIF files. Perhaps a try/catch block to convert to float?

// TODO: since I've got the element, in principle I can look up the
// mass and atomic number, but doing so will require some changes to
// AtomicNumberDeducer
ag.append(pa);

atom_index++;
}
residue_number++;
}
}
std::cout << ag << std::endl;
}
38 changes: 38 additions & 0 deletions Packages/User/test_mmcif_2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

/*
This file is part of LOOS.
LOOS (Lightweight Object-Oriented Structure library)
Copyright (c) 2023 Alan Grossfield
Department of Biochemistry and Biophysics
School of Medicine & Dentistry, University of Rochester
This package (LOOS) is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation under version 3 of the License.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <loos.hpp>

using namespace loos;

int main(int argc, char *argv[]) {

std::string filename = argv[1];
MMCIF mmcif_system(filename);
std::cerr << "size = " << mmcif_system.size() << std::endl;
std::cerr << " centroids: " << "\t"
<< mmcif_system.centroid() << std::endl;
//AtomicGroup ligand = selectAtoms(mmcif_system, std::string("resname=='LIG'"));
//std::cerr << "ligand size = " << ligand.size() << std::endl;
//std::cerr << *(ligand[0]) << std::endl;

AtomicGroup system = createSystem(filename);
std::cerr << "size = " << system.size() << std::endl;
std::cerr << "centroid from ag = " << system.centroid() << std::endl;

return 0;
}
2 changes: 1 addition & 1 deletion conda_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ platform=`uname`
echo "Setting channel priority to strict"
conda config --set channel_priority strict

packages="python=3 swig=4 cmake numpy scipy scikit-learn boost openblas libnetcdf lapack compilers eigen hdf5"
packages="python=3 swig=4 cmake numpy scipy scikit-learn boost openblas libnetcdf lapack compilers eigen hdf5 gemmi"

env_found=$(conda env list | egrep -v '^#' | egrep "^${envname}[ ]" )
# Build up the conda installation command line
Expand Down
13 changes: 13 additions & 0 deletions src/AtomicGroup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,19 @@ namespace loos {
return(n);
}

double AtomicGroup::deduceMassFromAtomicNumber() {
uint n = 0;
for (AtomicGroup::iterator i = begin(); i != end(); ++i)
if ((*i)->checkProperty(Atom::anumbit)) {
double mass = loos::deduceMassFromAtomicNumber((*i)->atomic_number());
if (mass) {
(*i)->mass(mass);
++n;
}
}
return(n);
}


void AtomicGroup::copyCoordinatesWithIndex(const std::vector<GCoord> &coords) {
if (! atoms.empty())
Expand Down
3 changes: 3 additions & 0 deletions src/AtomicGroup.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,9 @@ namespace loos
//! Deduce atomic number from mass (if present), returning number of atoms assigned
uint deduceAtomicNumberFromMass(const double tol = 0.1);

//! Deduce mass from atomic number (if present), returning the number of atoms assigned
double deduceMassFromAtomicNumber();

//! Is the array of atoms already sorted???
/**
* While we make some effort to ensure that alterations to the AtomicGroup
Expand Down
16 changes: 16 additions & 0 deletions src/AtomicNumberDeducer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@ namespace loos {
return(0);
}

double AtomicNumberDeducer::deduceMass(const unsigned int number) {
static internal::AtomicNumberDeducer deducer;
std::vector<MassNumber>::iterator i;
for (i = deducer.element_table.begin(); i != deducer.element_table.end(); ++i)
if (i->second == number)
break;
if (i != element_table.end()) {
return(i->first);
}
return(0);
}

std::string AtomicNumberDeducer::deduceName(const double mass,
const double tolerance) {

Expand Down Expand Up @@ -249,4 +261,8 @@ namespace loos {
return deducer.deduceName(mass, tolerance);
}

double deduceMassFromAtomicNumber(const unsigned int number) {
static internal::AtomicNumberDeducer deducer;
return(deducer.deduceMass(number));
}
};
4 changes: 4 additions & 0 deletions src/AtomicNumberDeducer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ namespace loos {

unsigned int deduceFromMass(const double mass, const double tolerance);
std::string deduceName(const double mass, const double tolerance);
double deduceMass(const unsigned int number);

private:
void initialize();
Expand All @@ -35,6 +36,9 @@ namespace loos {
unsigned int deduceAtomicNumberFromMass(const double mass, const double tolerance = 0.1);
std::string deduceElementNameFromMass(const double mass, const double tolerance = 0.1);

//! Deduce a mass from the atomic number
double deduceMassFromAtomicNumber(const unsigned int number);

};


Expand Down
4 changes: 3 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ set(loos_hdr_files
loos_timer.hpp
mdtraj.hpp
mdtrajtraj.hpp
mmcif.hpp
pdb.hpp
pdb_remarks.hpp
pdbtraj.hpp
Expand Down Expand Up @@ -130,6 +131,7 @@ set(loos_src_files
index_range_parser.cpp
mdtraj.cpp
mdtrajtraj.cpp
mmcif.cpp
pdb.cpp
pdb_remarks.cpp
pdbtraj.cpp
Expand Down Expand Up @@ -167,7 +169,7 @@ target_link_libraries(loos Boost::program_options
BLAS::BLAS
LAPACK::LAPACK
${HDF5_CXX_LIBRARIES}
)
gemmi::gemmi_cpp)


if(SWIG_FOUND AND BUILD_PYLOOS)
Expand Down
1 change: 1 addition & 0 deletions src/loos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
#include <amber.hpp>
#include <mdtraj.hpp>
#include <tinkerxyz.hpp>
#include <mmcif.hpp>

#include <Trajectory.hpp>
#include <dcd.hpp>
Expand Down
2 changes: 2 additions & 0 deletions src/loos_defs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ namespace loos {
class Gromacs;
class CHARMM;
class MDTraj;
class MMCIF;

typedef boost::shared_ptr<AtomicGroup> pAtomicGroup;
typedef boost::shared_ptr<PDB> pPDB;
Expand All @@ -171,6 +172,7 @@ namespace loos {
typedef boost::shared_ptr<TinkerXYZ> pTinkerXYZ;
typedef boost::shared_ptr<Gromacs> pGromacs;
typedef boost::shared_ptr<CHARMM> pCHARMM;
typedef boost::shared_ptr<MMCIF> pMMCIF;


// Misc
Expand Down
79 changes: 79 additions & 0 deletions src/mmcif.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
This file is part of LOOS.
LOOS (Lightweight Object-Oriented Structure library)
Copyright (c) 2023, Tod D. Romo, Alan Grossfield
Department of Biochemistry and Biophysics
School of Medicine & Dentistry, University of Rochester
This package (LOOS) is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation under version 3 of the License.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <mmcif.hpp>

namespace loos {

MMCIF* MMCIF::clone(void) const {
return(new MMCIF(*this));
}

void MMCIF::read(const std::string& filename) {
auto structure = gemmi::read_structure_file(filename, gemmi::CoorFormat::Mmcif);
auto unit_cell = structure.cell;
auto box = loos::GCoord(unit_cell.a, unit_cell.b, unit_cell.c);

periodicBox(box);

// TODO: hard-wired to read first model, but there should probably be a way to read others
auto model = structure.first_model();
int atom_index = 0;
int residue_number = 1;
for (auto chain:model.chains) {
std::string chain_name = chain.name;
for (auto residue:chain.residues) {
std::string residue_name = residue.name;
auto label_seq = residue.label_seq;
std::string res_entity_id = residue.entity_id;
for (auto atom:residue.atoms) {
loos::pAtom pa(new loos::Atom);
pa->index(atom.serial);
pa->id(atom_index);
pa->name(atom.name);
pa->PDBelement(atom.element.name());
pa->coords().x(atom.pos.x);
pa->coords().y(atom.pos.y);
pa->coords().z(atom.pos.z);
pa->resid(residue.seqid.num.value);
pa->chainId(chain_name);
// might as well fill in segid, even though it's not official
pa->segid(chain_name);
pa->resname(residue.name);
// TODO: charge is a char*, looks like it's usually "?" in actual
// mmCIF files. Perhaps a try/catch block to convert to float?

pa->atomic_number(atom.element.atomic_number());
append(pa);

atom_index++;
}
}
}

// assign the masses
uint n_assigned = deduceMassFromAtomicNumber();
// TODO: need to add bonds
}

}


Loading

0 comments on commit 0dd43ca

Please sign in to comment.