Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Mdtraj-hdf5 format #108

Merged
merged 33 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
ef39963
added highfive as dependency
agrossfield Jun 21, 2023
ee7f6b9
add test prog to build list
agrossfield Jun 22, 2023
378fcf9
totally non-functional
agrossfield Jun 23, 2023
204fc0e
going to give up on HighFive, storing
agrossfield Jun 23, 2023
19d6cd9
build broken
agrossfield Jun 23, 2023
eb56bbd
Builds under ubuntu...
tromo Jun 24, 2023
557ab99
link against boost::json
agrossfield Jun 26, 2023
e1ec914
find_package boost json
agrossfield Jun 26, 2023
753e03b
parsing works, converting to string sorta
agrossfield Jun 26, 2023
2b00a77
successfully read the topology
agrossfield Jun 27, 2023
76f1936
successfully reads topology!
agrossfield Jun 27, 2023
5900ea9
removed comments
agrossfield Jun 27, 2023
46a90c1
preliminary reading box dimensions
agrossfield Jun 27, 2023
338068c
can read box size for arbitrary frame
agrossfield Jun 27, 2023
0a9abc8
can read individual coordinate frames
agrossfield Jun 27, 2023
59c67ae
preliminary setup for MDTraj class
agrossfield Jun 28, 2023
e95804d
added mdtraj.* to build dependencies
agrossfield Jun 28, 2023
9dbd30e
compiles, so it must be right, all pieces present
agrossfield Jun 28, 2023
86725fd
add a missing exception
agrossfield Jun 28, 2023
fd0148c
mdtraj.i breaks swig, temporarily removed
agrossfield Jun 28, 2023
4db58af
builds, get H5::FileIException on read
agrossfield Jun 28, 2023
4e8a767
success reading topology and bonds
agrossfield Jun 29, 2023
f261f85
compiles, so it must be right
agrossfield Jun 29, 2023
85a04e5
interim commit, reading crashes
agrossfield Jun 29, 2023
bba703c
trajectory reading works
agrossfield Jun 30, 2023
db9062e
MDTraj reads first coords on read
agrossfield Jun 30, 2023
199a4e4
updated createTrajectory
agrossfield Jun 30, 2023
78e8d1f
check if system is periodic
agrossfield Jun 30, 2023
91853f7
fixed logic to recognize end of file
agrossfield Jun 30, 2023
2f0783b
forgot to take out highfive dependency
agrossfield Jun 30, 2023
2b2337d
add hdf5 to conda_build script
agrossfield Jul 10, 2023
157e3f6
replaced alloc on frame read with single alloc
agrossfield Jul 12, 2023
fbea31c
final cleanup before merge -- remove tests
agrossfield Jul 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,27 @@ if(NOT ${HAVE_SIZEOF_ULONG})
endif()

find_package(Boost REQUIRED COMPONENTS
regex program_options)
regex program_options json)

if(Boost_FOUND)
include_directories(${Boost_INCLUDE_DIRS})
endif()


find_package(NetCDF REQUIRED)
include_directories(${NetCDF_INCLUDE_DIRS})

set(BLA_VENDOR OpenBLAS)
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)

find_package(HDF5 REQUIRED COMPONENTS CXX C HL)

if (HDF5_FOUND)
include_directories(${HDF5_INCLUDE_DIRS})
endif()


find_package(SWIG 4.0 COMPONENTS python)
if(SWIG_FOUND)
include(${SWIG_USE_FILE})
Expand Down
2 changes: 2 additions & 0 deletions Packages/User/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ set(LOOS_USER_TOOLS
simple_model_transform
traj_calc
traj_transform
test_h5
test_h5_2
)

foreach(TOOL ${LOOS_USER_TOOLS})
Expand Down
166 changes: 166 additions & 0 deletions Packages/User/test_h5.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/*
Testing code for HDF5 support

(c) 2023 Alan Grossfield
Department of Biochemistry and Biophysics
University of Rochester School of Medicine and Dentistry


*/


/*
This file is part of LOOS.

LOOS (Lightweight Object-Oriented Structure library)
Copyright (c) 2011 Tod D. Romo
Department of Biochemistry and Biophysics
School of Medicine & Dentistry, University of Rochester

This package (LOOS) is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation under version 3 of the License.

This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.

*/
#include <loos.hpp>
#include <H5Cpp.h>
#include <boost/json.hpp>
//using namespace boost::json;

std::string getTopology(H5::H5File &file) {
H5::DataSet dataset = file.openDataSet("topology");
H5::DataSpace dataspace = dataset.getSpace();
H5::DataType datatype = dataset.getDataType();
H5std_string topology;
dataset.read(topology, datatype, dataspace, dataspace);
return std::string(topology);
}


int main(int argc, char *argv[]) {

std::string filename = argv[1];

// Turn off the auto-printing when failure occurs so that we can
// handle the errors appropriately
H5::Exception::dontPrint();

H5::H5File file(filename, H5F_ACC_RDONLY);
std::string topology_json = getTopology(file);
boost::json::value topology = boost::json::parse(topology_json);

loos::AtomicGroup ag;
uint index = 0;

boost::json::array chains = topology.at("chains").as_array();
for (auto chain: chains) {
boost::json::array residues = chain.at("residues").as_array();
for (auto residue: residues) {
int resnum = residue.at("resSeq").as_int64();
std::string resname = residue.at("name").as_string().data();

boost::json::array atoms = residue.at("atoms").as_array();
for (auto atom: atoms) {
std::string atom_name = atom.at("name").as_string().data();
int id = atom.at("index").as_int64();
id++; // HDF5 is 0 based, LOOS is 1 based
std::string element = atom.at("element").as_string().data();

loos::pAtom pa(new loos::Atom);
pa->name(atom_name);
pa->id(id);
pa->index(index);
pa->resid(resnum);
pa->resname(resname);
pa->PDBelement(element);

ag.append(pa);

index++;
}
}
}

// Add the bonds
// We're assuming the atoms are in order
boost::json::array bonds = topology.at("bonds").as_array();
for (auto bond: bonds) {
int atom1 = bond.at(0).as_int64();
int atom2 = bond.at(1).as_int64();
ag[atom1]->addBond(ag[atom2]);
ag[atom2]->addBond(ag[atom1]);
}

// TODO: I need an example that has constraints
// If there are constraints, we should treat them like bonds

// Read the coordinates and box size
H5::DataSet box_dataset = file.openDataSet("cell_lengths");
H5::DataSpace box_dataspace = box_dataset.getSpace();
H5::DataType box_datatype = box_dataset.getDataType();
hsize_t box_dims[3];
int ndims = box_dataspace.getSimpleExtentDims(box_dims, NULL);
//std::cout << ndims << std::endl;
//std::cout << box_dims[0] << std::endl;
//std::cout << box_dims[1] << std::endl;

//int num_elements = box_dims[0] * box_dims[1];
//auto box_lengths = new float[num_elements];

// TODO: this reads the whole traj at once, but I should learn how to read just
// one frame at a time to do the traj right
//box_dataset.read(box_lengths, box_datatype, box_dataspace, box_dataspace);

// Read the nth frame
hsize_t n = 32;
hsize_t offset[2] = {n, 0};
hsize_t count[2] = {1, box_dims[1]};
float one_box[box_dims[1]];
hsize_t offset_out[1] = {0};
hsize_t count_out[1] = {box_dims[1]};
H5::DataSpace memspace(1, count_out);

box_dataspace.selectHyperslab(H5S_SELECT_SET, count, offset);
box_dataset.read(one_box, box_datatype, memspace, box_dataspace);


// Set the periodic box to the first frame of the traj and fix the units
loos::GCoord box(one_box[0], one_box[1], one_box[2]);
box *= 10.0; // convert to Angstroms
ag.periodicBox(box);

// Read the coordinates for the nth frame
hsize_t frame = 10;
H5::DataSet coord_dataset = file.openDataSet("coordinates");
H5::DataSpace coord_dataspace = coord_dataset.getSpace();
H5::DataType coord_datatype = coord_dataset.getDataType();
hsize_t coord_dims[3];
int ndims_coord = coord_dataspace.getSimpleExtentDims(coord_dims, NULL);
std::cout << ndims_coord << std::endl;
std::cout << coord_dims[0] << "\t"
<< coord_dims[1] << "\t"
<< coord_dims[2] << std::endl;
float one_frame[coord_dims[1]][coord_dims[2]];
hsize_t offset_coord[3] = {frame, 0, 0};
hsize_t count_coord[3] = {1, coord_dims[1], coord_dims[2]};
hsize_t count_coord_out[2] = {coord_dims[1], coord_dims[2]};
H5::DataSpace memspace_coord(2, count_coord_out);
coord_dataspace.selectHyperslab(H5S_SELECT_SET, count_coord, offset_coord);
coord_dataset.read(one_frame, coord_datatype, memspace_coord, coord_dataspace);

std::cout << one_frame[0][0] << "\t" << one_frame[0][1] << "\t" << one_frame[0][2] << std::endl;
std::cout << one_frame[10][0] << "\t" << one_frame[10][1] << "\t" << one_frame[10][2] << std::endl;

//std::cout << box << std::endl;
//std::cout << loos::PDB::fromAtomicGroup(ag) << std::endl;

return 0;
}
73 changes: 73 additions & 0 deletions Packages/User/test_h5_2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
Testing code for HDF5 support

(c) 2023 Alan Grossfield
Department of Biochemistry and Biophysics
University of Rochester School of Medicine and Dentistry


*/


/*
This file is part of LOOS.

LOOS (Lightweight Object-Oriented Structure library)
Copyright (c) 2023 Alan Grossfield
Department of Biochemistry and Biophysics
School of Medicine & Dentistry, University of Rochester

This package (LOOS) is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation under version 3 of the License.

This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.

*/
#include <loos.hpp>

using namespace loos;

int main(int argc, char *argv[]) {

std::string filename = argv[1];
MDTraj mdtraj_system(filename);
//std::cerr << "size = " << mdtraj_system.size() << std::endl;
//std::cerr <<" centroids: " << "\t"
// << mdtraj_system.centroid() << std::endl;
AtomicGroup ligand = selectAtoms(mdtraj_system, std::string("resname=='LIG'"));
//std::cerr << "ligand size = " << ligand.size() << std::endl;
//std::cerr << *(ligand[0]) << std::endl;

AtomicGroup system = createSystem(filename);
//std::cerr << "size = " << system.size() << std::endl;
//std::cerr << "centroid from ag = " << system.centroid() << std::endl;

MDTrajTraj traj(filename, system.size());
traj.updateGroupCoords(system);
//std::cerr << "centroid: " << system.centroid() << std::endl;

pTraj traj2 = createTrajectory(filename, system);
traj2->updateGroupCoords(system);
//std::cerr << "centroid: " << system.centroid() << std::endl;
//std::cerr << "box: " << system.periodicBox() << std::endl;

std::cout << "#" << traj2->nframes() << std::endl;
while (traj2->readFrame()) {
traj2->updateGroupCoords(system);
GCoord centroid = system.centroid();
std::cout << traj2->currentFrame() << "\t"
<< centroid.x() << "\t"
<< centroid.y() << "\t"
<< centroid.z() << "\t"
<< std::endl;
}

return 0;
}
3 changes: 1 addition & 2 deletions conda_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ platform=`uname`
echo "Setting channel priority to strict"
conda config --set channel_priority strict

#packages="python=3 swig=4 cmake numpy scipy scikit-learn boost openblas libnetcdf\<4.9 lapack compilers eigen"
packages="python=3 swig=4 cmake numpy scipy scikit-learn boost openblas libnetcdf lapack compilers eigen"
packages="python=3 swig=4 cmake numpy scipy scikit-learn boost openblas libnetcdf lapack compilers eigen hdf5"

env_found=$(conda env list | egrep -v '^#' | egrep "^${envname}[ ]" )
# Build up the conda installation command line
Expand Down
9 changes: 8 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ set(loos_hdr_files
loos.hpp
loos_defs.hpp
loos_timer.hpp
mdtraj.hpp
mdtrajtraj.hpp
pdb.hpp
pdb_remarks.hpp
pdbtraj.hpp
Expand Down Expand Up @@ -126,6 +128,8 @@ set(loos_src_files
ensembles.cpp
gro.cpp
index_range_parser.cpp
mdtraj.cpp
mdtrajtraj.cpp
pdb.cpp
pdb_remarks.cpp
pdbtraj.cpp
Expand Down Expand Up @@ -158,9 +162,12 @@ target_include_directories(loos PUBLIC ${CMAKE_CURRENT_BINARY_DIR})

target_link_libraries(loos Boost::program_options
Boost::regex
Boost::json
NetCDF::NetCDF
BLAS::BLAS
LAPACK::LAPACK)
LAPACK::LAPACK
${HDF5_CXX_LIBRARIES}
)


if(SWIG_FOUND AND BUILD_PYLOOS)
Expand Down
5 changes: 5 additions & 0 deletions src/catch_it.i
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ selectAtoms;
%catches(loos::FileOpenError, loos::FileReadError, loos::ParseError, loos::LOOSError) Gromacs::Gromacs;
%catches(loos::FileOpenError, loos::FileReadError, loos::ParseError, loos::LOOSError) Gromacs::create;

// MDTraj
%catches(loos::FileOpenError, loos::FileReadError, loos::LOOSError) MDTraj::MDTraj;
// TODO: there are some H5 exceptions I should probably catch here
%catches(loos::FileReadError, loos::LOOSError, H5::FileIException) MDTraj::read;


// pdb_remarks

Expand Down
2 changes: 2 additions & 0 deletions src/loos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
#include <pdb.hpp>
#include <psf.hpp>
#include <amber.hpp>
#include <mdtraj.hpp>
#include <tinkerxyz.hpp>

#include <Trajectory.hpp>
Expand All @@ -91,6 +92,7 @@
#include <xtc.hpp>
#include <gro.hpp>
#include <trr.hpp>
#include <mdtrajtraj.hpp>



Expand Down
1 change: 1 addition & 0 deletions src/loos.i
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ namespace loos {
%include "utils.i"
%include "cryst.i"
%include "pdb.i"
//%include "mdtraj.i"
%include "ensembles.i"
%include "Geometry.i"
%include "TimeSeries.i"
Expand Down
1 change: 1 addition & 0 deletions src/loos_defs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ namespace loos {
class TinkerXYZ;
class Gromacs;
class CHARMM;
class MDTraj;

typedef boost::shared_ptr<AtomicGroup> pAtomicGroup;
typedef boost::shared_ptr<PDB> pPDB;
Expand Down
Loading
Loading