14 #ifndef MMTF_STRUCTURE_DATA_H 15 #define MMTF_STRUCTURE_DATA_H 25 #include <msgpack.hpp> 34 #define MMTF_SPEC_VERSION_MAJOR 1 35 #define MMTF_SPEC_VERSION_MINOR 1 115 for(
size_t i = 16; i--;) {
228 bool hasConsistentData(
bool verbose=
false, uint32_t chain_name_max_length = 4)
const;
237 std::string
print(std::string delim=
"\t")
const;
250 return !(*
this == c);
255 void copyMapData_(std::map<std::string, msgpack::object>& target,
256 const std::map<std::string, msgpack::object>& source);
265 template <
typename T>
273 template <
typename T>
275 template <
typename T>
280 inline bool isDefaultValue(
const std::map<std::string, msgpack::object>& value);
287 template <
typename T>
298 inline bool is_polymer(
const unsigned int chain_index,
299 const std::vector<Entity>& entity_list);
334 inline bool is_hetatm(
const unsigned int chain_index,
335 const std::vector<Entity>& entity_list,
336 const GroupType& group_type);
348 bool isValidDateFormatOptional(
const std::string& s) {
352 if (s.length() != 10)
return false;
354 if (s[4] !=
'-' || s[7] !=
'-')
return false;
356 std::istringstream is(s);
359 if (is >> y >> dash1 >> m >> dash2 >> d) {
360 return (dash1 ==
'-' && dash2 ==
'-');
368 bool hasRightSizeOptional(
const std::vector<T>& v,
int exp_size) {
373 template<
typename T,
typename Tnum>
374 bool hasValidIndices(
const T* v,
size_t size, Tnum num) {
376 for (
size_t i = 0; i < size; ++i) {
377 if (v[i] < T(0) || v[i] >= tnum)
return false;
381 template<
typename T,
typename Tnum>
382 bool hasValidIndices(
const std::vector<T>& v, Tnum num) {
383 if (v.empty())
return true;
384 else return hasValidIndices(&v[0], v.size(), num);
392 std::stringstream version;
394 return version.str();
398 std::stringstream ss(version_string);
405 template <
typename T>
408 template <
typename T>
410 return (value == getDefaultValue<T>());
412 template <
typename T>
414 return value.empty();
418 return value.empty();
421 inline bool isDefaultValue(
const std::map<std::string, msgpack::object>& value) {
422 return value.empty();
425 template <
typename T>
427 value = getDefaultValue<T>();
433 const std::vector<Entity>& entity_list) {
434 for (std::size_t i = 0; i < entity_list.size(); ++i) {
435 if ( std::find(entity_list[i].chainIndexList.begin(),
436 entity_list[i].chainIndexList.end(),
438 != entity_list[i].chainIndexList.end()) {
439 return ( entity_list[i].type ==
"polymer" 440 || entity_list[i].type ==
"POLYMER");
443 std::stringstream err;
444 err <<
"'is_polymer' unable to find chain_index: " << chain_index
445 <<
" in entity list";
450 const char* hetatm_type[] = {
451 "D-BETA-PEPTIDE, C-GAMMA LINKING",
452 "D-GAMMA-PEPTIDE, C-DELTA LINKING",
453 "D-PEPTIDE COOH CARBOXY TERMINUS",
454 "D-PEPTIDE NH3 AMINO TERMINUS",
457 "D-SACCHARIDE 1,4 AND 1,4 LINKING",
458 "D-SACCHARIDE 1,4 AND 1,6 LINKING",
459 "DNA OH 3 PRIME TERMINUS",
460 "DNA OH 5 PRIME TERMINUS",
464 "L-BETA-PEPTIDE, C-GAMMA LINKING",
465 "L-GAMMA-PEPTIDE, C-DELTA LINKING",
466 "L-PEPTIDE COOH CARBOXY TERMINUS",
467 "L-PEPTIDE NH3 AMINO TERMINUS",
470 "L-SACCHARIDE 1,4 AND 1,4 LINKING",
471 "L-SACCHARIDE 1,4 AND 1,6 LINKING",
472 "RNA OH 3 PRIME TERMINUS",
473 "RNA OH 5 PRIME TERMINUS",
481 for (
int i=0; hetatm_type[i]; ++i) {
482 if (strcmp(type,hetatm_type[i]) == 0)
return true;
488 const std::vector<Entity>& entity_list,
510 mmtfProducer =
"mmtf-cpp library (github.com/rcsb/mmtf-cpp)";
518 if (
this != &obj) copyAllData_(obj);
573 std::vector<int8_t> allowed_bond_orders;
574 allowed_bond_orders.push_back(-1);
575 allowed_bond_orders.push_back(1);
576 allowed_bond_orders.push_back(2);
577 allowed_bond_orders.push_back(3);
578 allowed_bond_orders.push_back(4);
581 if (!hasRightSizeOptional(
unitCell, 6)) {
583 std::cout <<
"inconsistent unitCell (unitCell length != 6)" << std::endl;
590 std::cout <<
"inconsistent depositionDate (does not match 'YYYY-MM-DD' " 591 "or empty)" << std::endl;
597 std::cout <<
"inconsistent releaseDate (does not match 'YYYY-MM-DD' " 598 "or empty)" << std::endl;
606 std::cout <<
"inconsistent ncsOperatorList idx: " << i <<
" found size: " 619 std::cout <<
"inconsistent BioAssemby transform i j: " << i
620 <<
" " << j << std::endl;
626 for (
size_t i = 0; i <
entityList.size(); ++i) {
630 std::cout <<
"inconsistent entity idx: " << i << std::endl;
636 for (
size_t i = 0; i <
groupList.size(); ++i) {
641 std::cout <<
"inconsistent group::atomNameList size at idx: " 648 std::cout <<
"inconsistent group::elementList size at idx: " 656 std::cout <<
"inconsistent group::bondAtomList size: " <<
657 g.
bondAtomList.size() <<
" != group::bondOrderList size(*2): " <<
663 if (std::find(allowed_bond_orders.begin(), allowed_bond_orders.end(),
666 std::cout <<
"Cannot have bond order of: " << (int)g.
bondOrderList[j]
667 <<
" allowed bond orders are: -1, 1, 2, 3 or 4. at idx: " << j << std::endl;
676 std::cout <<
"Cannot have bondResonanceList without both " <<
677 "bondOrderList and bondAtomList! at idx: " << i << std::endl;
683 std::cout <<
"inconsistent group::bondOrderSize size: " <<
684 g.
bondOrderList.size() <<
" != group::bondResonanceList size: " <<
691 std::cout <<
"inconsistent group::bondAtomList size: " <<
692 g.
bondAtomList.size() <<
" != group::bondResonanceList size(*2): " <<
701 std::cout <<
"group::bondResonanceList had a Resonance of: " 709 std::cout <<
"group::bondResonanceList had a Resonance of: " 711 << (
int)g.
bondOrderList[j] <<
" we require unknown bondOrders to have resonance" 720 std::cout <<
"inconsistent group::bondAtomList indices (not all in [0, " 721 << num_atoms - 1 <<
"]) at idx: " << i << std::endl;
730 std::cout <<
"inconsistent bondAtomList size: " <<
731 bondAtomList.size() <<
" != bondOrderList size(*2): " <<
737 if (std::find(allowed_bond_orders.begin(), allowed_bond_orders.end(),
740 std::cout <<
"Cannot have bond order of: " << (int)
bondOrderList[i]
741 <<
" allowed bond orders are: -1, 1, 2, 3 or 4. at idx: " << i << std::endl;
750 std::cout <<
"Cannot have bondResonanceList without both " <<
751 "bondOrderList and bondAtomList!" << std::endl;
757 std::cout <<
"inconsistent bondAtomList size: " <<
758 bondAtomList.size() <<
" != bondResonanceList size(*2): " <<
766 std::cout <<
"bondResonanceList had a Resonance of: " 774 std::cout <<
"bondResonanceList had a Resonance of: " 776 << (
int)
bondOrderList[i] <<
" we require unknown bondOrders to have resonance" 785 std::cout <<
"inconsistent bondAtomList indices (not all in [0, " 786 <<
numAtoms - 1 <<
"])" << std::endl;
793 std::cout <<
"inconsistent xCoordList size" << std::endl;
799 std::cout <<
"inconsistent yCoordList size" << std::endl;
805 std::cout <<
"inconsistent zCoordList size" << std::endl;
811 std::cout <<
"inconsistent bFactorList size" << std::endl;
817 std::cout <<
"inconsistent atomIdList size" << std::endl;
823 std::cout <<
"inconsistent altLocList size" << std::endl;
829 std::cout <<
"inconsistent occupancyList size" << std::endl;
835 std::cout <<
"inconsistent groupIdList size" << std::endl;
841 std::cout <<
"inconsistent groupTypeList size" << std::endl;
847 std::cout <<
"inconsistent secStructList size" << std::endl;
853 std::cout <<
"inconsistent insCodeList size" << std::endl;
859 std::cout <<
"inconsistent sequenceIndexList size" << std::endl;
865 std::cout <<
"inconsistent chainIdList size" << std::endl;
871 std::cout <<
"inconsistent chainNameList size" << std::endl;
877 std::cout <<
"inconsistent groupsPerChain size" << std::endl;
883 std::cout <<
"inconsistent chainsPerModel size" << std::endl;
890 std::cout <<
"inconsistent groupTypeList indices (not all in [0, " 891 <<
groupList.size() - 1 <<
"])" << std::endl;
896 std::vector<int32_t> sequenceIndexSize(
numChains);
897 for (
size_t i = 0; i <
entityList.size(); ++i) {
904 int bond_count_from_atom = 0;
905 int bond_count_from_order = 0;
906 int bond_count_from_resonance = 0;
907 bool all_bond_orderLists_are_default =
true;
908 bool all_bond_resonanceLists_are_default =
true;
909 bool all_bond_atomLists_are_default =
true;
911 all_bond_orderLists_are_default =
false;
915 all_bond_resonanceLists_are_default =
false;
919 all_bond_atomLists_are_default =
false;
926 for (
int model_idx = 0; model_idx <
numModels; ++model_idx) {
928 for (
int j = 0; j <
chainsPerModel[model_idx]; ++j, ++chain_idx) {
930 if (
chainIdList[chain_idx].size() > chain_name_max_length) {
932 std::cout <<
"inconsistent chainIdList size at chain_idx: " 933 << chain_idx <<
" size: " 939 &&
chainNameList[chain_idx].size() > chain_name_max_length) {
941 std::cout <<
"inconsistent chainNameList size at chain_idx:" 942 << chain_idx <<
" size: " 948 for (
int k = 0; k <
groupsPerChain[chain_idx]; ++k, ++group_idx) {
953 if (idx < -1 || idx >= sequenceIndexSize[chain_idx]) {
955 std::cout <<
"inconsistent sequenceIndexSize at" 956 " chain_idx: " << chain_idx << std::endl;
966 all_bond_orderLists_are_default =
false;
970 all_bond_resonanceLists_are_default =
false;
974 all_bond_atomLists_are_default =
false;
982 if (!all_bond_orderLists_are_default) {
983 if (bond_count_from_order !=
numBonds) {
985 std::cout <<
"inconsistent numBonds vs bond order count" << std::endl;
990 if (!all_bond_resonanceLists_are_default) {
991 if (bond_count_from_resonance !=
numBonds) {
993 std::cout <<
"inconsistent numBonds vs bond resonance count" << std::endl;
998 if (!all_bond_atomLists_are_default) {
999 if (bond_count_from_atom !=
numBonds) {
1001 std::cout <<
"inconsistent numBonds vs bond atom list count" << std::endl;
1008 std::cout <<
"inconsistent numChains" << std::endl;
1014 std::cout <<
"inconsistent numGroups size" << std::endl;
1020 std::cout <<
"inconsistent numAtoms size" << std::endl;
1029 std::ostringstream out;
1036 for (
int i = 0; i <
numModels; i++, modelIndex++) {
1038 for (
int j = 0; j <
chainsPerModel[modelIndex]; j++, chainIndex++) {
1040 for (
int k = 0; k <
groupsPerChain[chainIndex]; k++, groupIndex++) {
1045 for (
int l = 0; l < groupAtomCount; l++, atomIndex++) {
1048 out <<
"HETATM" << delim;
1050 out <<
"ATOM" << delim;
1053 out << std::setfill(
'0') << std::internal << std::setw(6) <<
1054 std::right <<
atomIdList[atomIndex] << delim;
1055 }
else out <<
"." << delim;
1062 out <<
"." << delim;
1064 }
else out <<
"." << delim;
1072 }
else out <<
"." << delim;
1079 out <<
"." << delim;
1080 else out << int(
insCodeList[groupIndex]) << delim;
1083 out << std::fixed << std::setprecision(3);
1091 }
else out <<
"." << delim;
1095 }
else out <<
"." << delim;
1107 inline void StructureData::copyMapData_(
1108 std::map<std::string, msgpack::object>& target,
1109 const std::map<std::string, msgpack::object>& source) {
1111 std::map<std::string, msgpack::object>::const_iterator it;
1112 for (it = source.begin(); it != source.end(); ++it) {
1114 target[it->first] = tmp_object;
1118 inline void StructureData::copyAllData_(
const StructureData& obj) {
std::vector< int8_t > bondOrderList
Definition: structure_data.hpp:180
std::vector< std::string > elementList
Definition: structure_data.hpp:56
std::vector< int8_t > secStructList
Definition: structure_data.hpp:191
std::vector< std::string > atomNameList
Definition: structure_data.hpp:55
std::map< std::string, msgpack::object > bondProperties
Definition: structure_data.hpp:199
bool isVersionSupported(const std::string &version_string)
Check if version is supported (minor revisions ok, major ones not)
Definition: structure_data.hpp:397
msgpack::zone msgpack_zone
Definition: structure_data.hpp:198
std::vector< char > altLocList
Definition: structure_data.hpp:187
Exception thrown when failing during decoding.
Definition: errors.hpp:23
bool operator!=(const StructureData &c) const
Compare two StructureData classes for inequality.
Definition: structure_data.hpp:249
std::string mmtfVersion
Definition: structure_data.hpp:158
std::map< std::string, msgpack::object > chainProperties
Definition: structure_data.hpp:202
std::vector< std::string > experimentalMethods
Definition: structure_data.hpp:169
int32_t numGroups
Definition: structure_data.hpp:175
std::vector< float > yCoordList
Definition: structure_data.hpp:183
std::vector< int32_t > groupsPerChain
Definition: structure_data.hpp:196
std::vector< char > insCodeList
Definition: structure_data.hpp:192
std::string structureId
Definition: structure_data.hpp:162
std::vector< GroupType > groupList
Definition: structure_data.hpp:178
std::map< std::string, msgpack::object > modelProperties
Definition: structure_data.hpp:203
Top level MMTF data container.
Definition: structure_data.hpp:157
std::vector< int32_t > groupTypeList
Definition: structure_data.hpp:190
std::string type
Definition: structure_data.hpp:86
std::vector< int32_t > groupIdList
Definition: structure_data.hpp:189
bool hasConsistentData(bool verbose=false, uint32_t chain_name_max_length=4) const
Check consistency of structural data.
Definition: structure_data.hpp:572
std::map< std::string, msgpack::object > groupProperties
Definition: structure_data.hpp:201
#define MMTF_SPEC_VERSION_MINOR
Definition: structure_data.hpp:35
std::vector< Transform > transformList
Definition: structure_data.hpp:133
std::vector< int32_t > chainsPerModel
Definition: structure_data.hpp:197
std::vector< Entity > entityList
Definition: structure_data.hpp:168
std::string description
Definition: structure_data.hpp:85
std::string groupName
Definition: structure_data.hpp:60
std::vector< int32_t > bondAtomList
Definition: structure_data.hpp:57
bool operator==(const StructureData &c) const
Compare two StructureData classes for equality.
Definition: structure_data.hpp:522
char singleLetterCode
Definition: structure_data.hpp:61
int32_t numChains
Definition: structure_data.hpp:176
std::vector< int32_t > formalChargeList
Definition: structure_data.hpp:54
void setDefaultValue(T &value)
Set default value to given type.
Definition: structure_data.hpp:426
Data store for the biological assembly annotation.
Definition: structure_data.hpp:132
std::vector< int32_t > atomIdList
Definition: structure_data.hpp:186
std::vector< std::string > chainNameList
Definition: structure_data.hpp:195
std::vector< std::vector< float > > ncsOperatorList
Definition: structure_data.hpp:166
float resolution
Definition: structure_data.hpp:170
Group (residue) level data store.
Definition: structure_data.hpp:53
float rFree
Definition: structure_data.hpp:171
int32_t numBonds
Definition: structure_data.hpp:173
std::string print(std::string delim="\) const
Read out the contents of mmtf::StructureData in a PDB-like fashion Columns are in order: ATOM/HETATM ...
Definition: structure_data.hpp:1028
std::vector< float > xCoordList
Definition: structure_data.hpp:182
Definition: binary_decoder.hpp:25
std::string chemCompType
Definition: structure_data.hpp:62
MSGPACK_DEFINE_MAP(chainIndexList, description, type, sequence)
std::string sequence
Definition: structure_data.hpp:87
std::vector< float > zCoordList
Definition: structure_data.hpp:184
std::vector< float > unitCell
Definition: structure_data.hpp:160
std::vector< int8_t > bondResonanceList
Definition: structure_data.hpp:181
std::string releaseDate
Definition: structure_data.hpp:165
std::string name
Definition: structure_data.hpp:134
int32_t numModels
Definition: structure_data.hpp:177
std::string depositionDate
Definition: structure_data.hpp:164
std::vector< std::string > chainIdList
Definition: structure_data.hpp:194
#define MMTF_SPEC_VERSION_MAJOR
MMTF spec version which this library implements.
Definition: structure_data.hpp:34
StructureData & operator=(const StructureData &f)
Overload for assignment operator.
Definition: structure_data.hpp:517
bool operator==(BioAssembly const &c) const
Definition: structure_data.hpp:136
T getDefaultValue()
Get default value for given type.
Definition: structure_data.hpp:406
bool is_polymer(const unsigned int chain_index, const std::vector< Entity > &entity_list)
Check if chain is a polymer chain.
Definition: structure_data.hpp:432
int32_t numAtoms
Definition: structure_data.hpp:174
std::vector< BioAssembly > bioAssemblyList
Definition: structure_data.hpp:167
std::vector< int32_t > chainIndexList
Definition: structure_data.hpp:84
std::vector< int8_t > bondResonanceList
Definition: structure_data.hpp:59
float rWork
Definition: structure_data.hpp:172
std::vector< int8_t > bondOrderList
Definition: structure_data.hpp:58
bool isDefaultValue(const T &value)
Definition: structure_data.hpp:409
std::vector< int32_t > sequenceIndexList
Definition: structure_data.hpp:193
bool operator==(GroupType const &c) const
Definition: structure_data.hpp:64
std::string title
Definition: structure_data.hpp:163
std::map< std::string, msgpack::object > atomProperties
Definition: structure_data.hpp:200
std::string getVersionString()
Get string representation of MMTF spec version implemented here.
Definition: structure_data.hpp:391
StructureData()
Construct object with default values set.
Definition: structure_data.hpp:497
Entity type.
Definition: structure_data.hpp:83
bool operator==(Entity const &c) const
Definition: structure_data.hpp:89
MSGPACK_DEFINE_MAP(transformList, name)
std::string spaceGroup
Definition: structure_data.hpp:161
std::string mmtfProducer
Definition: structure_data.hpp:159
std::vector< float > bFactorList
Definition: structure_data.hpp:185
std::vector< int32_t > bondAtomList
Definition: structure_data.hpp:179
std::vector< float > occupancyList
Definition: structure_data.hpp:188
bool is_hetatm(const char *type)
Check if group type consists of HETATM atoms.
Definition: structure_data.hpp:449
std::map< std::string, msgpack::object > extraProperties
Definition: structure_data.hpp:204