35 namespace fs = std::filesystem;
43 void atom::atom_impl::moveTo(
const point &p)
45 if (m_symop !=
"1_555")
46 throw std::runtime_error(
"Moving symmetry copy");
51 r.assign(
"Cartn_x", std::format(
"{:.3f}", p.m_x),
false,
false);
52 r.assign(
"Cartn_y", std::format(
"{:.3f}", p.m_y),
false,
false);
53 r.assign(
"Cartn_z", std::format(
"{:.3f}", p.m_z),
false,
false);
55 r.assign(
"Cartn_x", cif::format(
"%.3f", p.m_x).str(),
false,
false);
56 r.assign(
"Cartn_y", cif::format(
"%.3f", p.m_y).str(),
false,
false);
57 r.assign(
"Cartn_z", cif::format(
"%.3f", p.m_z).str(),
false,
false);
64 std::string atom::atom_impl::get_property(std::string_view name)
const 66 return row()[name].as<std::string>();
69 int atom::atom_impl::get_property_int(std::string_view name)
const 72 if (not row()[name].empty())
74 auto s = get_property(name);
76 std::from_chars_result r = std::from_chars(s.data(), s.data() + s.length(), result);
77 if (r.ec != std::errc() and
VERBOSE > 0)
78 std::cerr <<
"Error converting " << s <<
" to number for property " << name << std::endl;
83 float atom::atom_impl::get_property_float(std::string_view name)
const 86 if (not row()[name].empty())
88 auto s = get_property(name);
90 std::from_chars_result r = cif::from_chars(s.data(), s.data() + s.length(), result);
91 if (r.ec != std::errc() and
VERBOSE > 0)
92 std::cerr <<
"Error converting " << s <<
" to number for property " << name << std::endl;
97 void atom::atom_impl::set_property(
const std::string_view name,
const std::string &value)
101 throw std::runtime_error(
"Trying to modify a row that does not exist");
102 r.assign(name, value,
true,
true);
137 int atom::atom_impl::get_charge()
const 139 auto formalCharge = row()[
"pdbx_formal_charge"].as<std::optional<int>>();
141 if (not formalCharge.has_value())
143 auto c = cif::compound_factory::instance().create(get_property(
"label_comp_id"));
145 if (
c !=
nullptr and
c->atoms().size() == 1)
146 formalCharge =
c->atoms().front().charge;
149 return formalCharge.value_or(0);
293 os << atom.get_label_comp_id() <<
' ' << atom.get_label_asym_id() <<
':' << atom.get_label_seq_id() <<
' ' << atom.get_label_atom_id();
295 if (atom.is_alternate())
296 os <<
'(' << atom.get_label_alt_id() <<
')';
297 if (atom.get_auth_asym_id() != atom.get_label_asym_id()
or atom.get_auth_seq_id() !=
std::to_string(atom.get_label_seq_id())
or atom.get_pdb_ins_code().empty() ==
false)
298 os <<
" [" << atom.get_auth_asym_id() <<
':' << atom.get_auth_seq_id() << atom.get_pdb_ins_code() <<
']';
306 residue::residue(structure &structure,
const std::vector<atom> &atoms)
307 : m_structure(&structure)
310 throw std::runtime_error(
"Empty list of atoms");
312 auto &
a = atoms.front();
314 m_compound_id =
a.get_label_comp_id();
315 m_asym_id =
a.get_label_asym_id();
316 m_seq_id =
a.get_label_seq_id();
317 m_auth_asym_id =
a.get_auth_asym_id();
318 m_auth_seq_id =
a.get_auth_seq_id();
319 m_pdb_ins_code =
a.get_pdb_ins_code();
321 for (
auto atom : atoms)
322 m_atoms.push_back(atom);
356 std::string residue::get_entity_id()
const 360 if (not m_atoms.empty())
361 result = m_atoms.front().get_label_entity_id();
362 else if (m_structure !=
nullptr and not m_asym_id.empty())
366 auto &db = m_structure->get_datablock();
367 result = db[
"struct_asym"].find1<std::string>(
"id"_key == m_asym_id,
"entity_id");
373 EntityType residue::entity_type()
const 376 return m_structure->get_entity_type_for_entity_id(get_entity_id());
425 void residue::add_atom(atom &atom)
433 m_atoms.push_back(atom);
436 std::vector<atom> residue::unique_atoms()
const 441 std::vector<atom> result;
442 std::string firstAlt;
444 for (
auto &atom : m_atoms)
446 auto alt = atom.get_label_alt_id();
449 result.push_back(atom);
453 if (firstAlt.empty())
455 else if (alt != firstAlt)
458 std::cerr <<
"skipping alternate atom " << atom << std::endl;
462 result.push_back(atom);
468 std::set<std::string> residue::get_alternate_ids()
const 470 std::set<std::string> result;
472 for (
auto a : m_atoms)
474 auto alt =
a.get_label_alt_id();
482 atom residue::get_atom_by_atom_id(
const std::string &atom_id)
const 486 for (
auto &
a : m_atoms)
488 if (
a.get_label_atom_id() == atom_id)
495 if (not result and
VERBOSE > 1)
496 std::cerr <<
"atom with atom_id " << atom_id <<
" not found in residue " << m_asym_id <<
':' << m_seq_id << std::endl;
503 bool residue::is_entity()
const 505 auto &db = m_structure->get_datablock();
507 auto a1 = db[
"atom_site"].find(key(
"label_asym_id") == m_asym_id);
511 return a1.size() == a2.size();
514 std::tuple<point, float> residue::center_and_radius()
const 516 std::vector<point> pts;
517 for (
auto &
a : m_atoms)
518 pts.push_back(
a.get_location());
525 float d =
static_cast<float>(
distance(pt, center));
530 return std::make_tuple(center, radius);
533 bool residue::has_alternate_atoms()
const 535 return std::find_if(m_atoms.begin(), m_atoms.end(), [](
const atom &atom)
536 {
return atom.is_alternate(); }) != m_atoms.end();
539 std::set<std::string> residue::get_atom_ids()
const 541 std::set<std::string> ids;
542 for (
auto a : m_atoms)
543 ids.insert(
a.get_label_atom_id());
548 std::vector<atom> residue::get_atoms_by_id(
const std::string &atom_id)
const 550 std::vector<atom> atoms;
551 for (
auto a : m_atoms)
553 if (
a.get_label_atom_id() == atom_id)
559 std::ostream &
operator<<(std::ostream &os,
const residue &res)
561 os << res.get_compound_id() <<
' ' << res.get_asym_id() <<
':' << res.get_seq_id();
563 if (res.get_auth_asym_id() != res.get_asym_id()
or res.get_auth_seq_id() !=
std::to_string(res.get_seq_id()))
564 os <<
" [" << res.get_auth_asym_id() <<
':' << res.get_auth_seq_id() <<
']';
572 monomer::monomer(
const polymer &polymer,
size_t index,
int seqID,
const std::string &authSeqID,
const std::string &pdbInsCode,
const std::string &compoundID)
573 : residue(*polymer.get_structure(), compoundID, polymer.get_asym_id(), seqID, polymer.get_auth_asym_id(), authSeqID, pdbInsCode)
574 , m_polymer(&polymer)
579 monomer::monomer(monomer &&rhs)
580 : residue(std::move(rhs))
581 , m_polymer(rhs.m_polymer)
582 , m_index(rhs.m_index)
584 rhs.m_polymer =
nullptr;
587 monomer &monomer::operator=(monomer &&rhs)
589 residue::operator=(std::move(rhs));
590 m_polymer = rhs.m_polymer;
591 rhs.m_polymer =
nullptr;
592 m_index = rhs.m_index;
597 bool monomer::is_first_in_chain()
const 602 bool monomer::is_last_in_chain()
const 604 return m_index + 1 == m_polymer->size();
607 bool monomer::has_alpha()
const 609 return m_index >= 1 and m_index + 2 < m_polymer->size();
612 bool monomer::has_kappa()
const 614 return m_index >= 2 and m_index + 2 < m_polymer->size();
617 float monomer::phi()
const 623 auto &prev = m_polymer->operator[](m_index - 1);
624 if (prev.m_seq_id + 1 == m_seq_id)
631 if (a1 and a2 and a3 and a4)
632 result = dihedral_angle(a1.get_location(), a2.get_location(), a3.get_location(), a4.get_location());
643 if (m_index + 1 < m_polymer->size())
645 auto &next = m_polymer->operator[](m_index + 1);
646 if (m_seq_id + 1 == next.m_seq_id)
653 if (a1 and a2 and a3 and a4)
654 result = dihedral_angle(a1.get_location(), a2.get_location(), a3.get_location(), a4.get_location());
661 float monomer::alpha()
const 667 if (m_index >= 1 and m_index + 2 < m_polymer->size())
669 auto &prev = m_polymer->operator[](m_index - 1);
670 auto &next = m_polymer->operator[](m_index + 1);
671 auto &nextNext = m_polymer->operator[](m_index + 2);
673 result =
static_cast<float>(dihedral_angle(prev.CAlpha().get_location(), CAlpha().get_location(), next.CAlpha().get_location(), nextNext.CAlpha().get_location()));
676 catch (
const std::exception &ex)
679 std::cerr << ex.what() << std::endl;
685 float monomer::kappa()
const 691 if (m_index >= 2 and m_index + 2 < m_polymer->size())
693 auto &prevPrev = m_polymer->operator[](m_index - 2);
694 auto &nextNext = m_polymer->operator[](m_index + 2);
696 if (prevPrev.m_seq_id + 4 == nextNext.m_seq_id)
698 double ckap = cosinus_angle(CAlpha().get_location(), prevPrev.CAlpha().get_location(), nextNext.CAlpha().get_location(), CAlpha().get_location());
699 double skap =
std::sqrt(1 - ckap * ckap);
700 result =
static_cast<float>(std::atan2(skap, ckap) * 180 / kPI);
704 catch (
const std::exception &ex)
707 std::cerr <<
"When trying to calculate kappa for " << m_asym_id <<
':' << m_seq_id <<
": " 708 << ex.what() << std::endl;
714 float monomer::tco()
const 722 auto &prev = m_polymer->operator[](m_index - 1);
723 if (prev.m_seq_id + 1 == m_seq_id)
724 result =
static_cast<float>(cosinus_angle(C().get_location(), O().get_location(), prev.C().get_location(), prev.O().get_location()));
727 catch (
const std::exception &ex)
730 std::cerr <<
"When trying to calculate tco for " << get_asym_id() <<
':' << get_seq_id() <<
": " 731 << ex.what() << std::endl;
737 float monomer::omega()
const 743 if (not is_last_in_chain())
744 result = omega(*
this, m_polymer->operator[](m_index + 1));
746 catch (
const std::exception &ex)
749 std::cerr <<
"When trying to calculate omega for " << get_asym_id() <<
':' << get_seq_id() <<
": " 750 << ex.what() << std::endl;
757 {
"ASP", {
"CG",
"OD1"}},
758 {
"ASN", {
"CG",
"OD1"}},
759 {
"ARG", {
"CG",
"CD",
"NE",
"CZ"}},
760 {
"HIS", {
"CG",
"ND1"}},
761 {
"GLN", {
"CG",
"CD",
"OE1"}},
762 {
"GLU", {
"CG",
"CD",
"OE1"}},
765 {
"LYS", {
"CG",
"CD",
"CE",
"NZ"}},
766 {
"TYR", {
"CG",
"CD1"}},
767 {
"PHE", {
"CG",
"CD1"}},
768 {
"LEU", {
"CG",
"CD1"}},
769 {
"TRP", {
"CG",
"CD1"}},
771 {
"ILE", {
"CG1",
"CD1"}},
772 {
"MET", {
"CG",
"SD",
"CE"}},
773 {
"MSE", {
"CG",
"SE",
"CE"}},
774 {
"PRO", {
"CG",
"CD"}},
777 size_t monomer::nr_of_chis()
const 781 auto i = kChiAtomsMap.find(m_compound_id);
782 if (
i != kChiAtomsMap.end())
783 result =
i->second.size();
788 float monomer::chi(
size_t nr)
const 794 auto i = kChiAtomsMap.find(m_compound_id);
795 if (
i != kChiAtomsMap.end() and nr <
i->second.size())
797 std::vector<std::string> atoms{
"N",
"CA",
"CB"};
799 atoms.insert(atoms.end(),
i->second.begin(),
i->second.end());
802 if (chiral_volume() > 0)
804 if (m_compound_id ==
"LEU")
805 atoms.back() =
"CD2";
806 if (m_compound_id ==
"VAL")
807 atoms.back() =
"CG2";
810 result =
static_cast<float>(dihedral_angle(
811 get_atom_by_atom_id(atoms[nr + 0]).get_location(),
812 get_atom_by_atom_id(atoms[nr + 1]).get_location(),
813 get_atom_by_atom_id(atoms[nr + 2]).get_location(),
814 get_atom_by_atom_id(atoms[nr + 3]).get_location()));
817 catch (
const std::exception &e)
820 std::cerr << e.what() << std::endl;
827 bool monomer::is_cis()
const 831 if (m_index + 1 < m_polymer->size())
833 auto &next = m_polymer->operator[](m_index + 1);
835 result = monomer::is_cis(*
this, next);
841 bool monomer::is_complete()
const 844 for (
auto &
a : m_atoms)
846 if (
a.get_label_atom_id() ==
"CA")
848 else if (
a.get_label_atom_id() ==
"C")
850 else if (
a.get_label_atom_id() ==
"N")
852 else if (
a.get_label_atom_id() ==
"O")
859 bool monomer::has_alternate_backbone_atoms()
const 863 for (
auto &
a : m_atoms)
865 if (not
a.is_alternate())
868 auto atom_id =
a.get_label_atom_id();
869 if (atom_id ==
"CA" or atom_id ==
"C" or atom_id ==
"N" or atom_id ==
"O")
879 float monomer::chiral_volume()
const 883 if (m_compound_id ==
"LEU")
885 auto centre = get_atom_by_atom_id(
"CG");
886 auto atom1 = get_atom_by_atom_id(
"CB");
887 auto atom2 = get_atom_by_atom_id(
"CD1");
888 auto atom3 = get_atom_by_atom_id(
"CD2");
890 result = dot_product(atom1.get_location() - centre.get_location(),
891 cross_product(atom2.get_location() - centre.get_location(), atom3.get_location() - centre.get_location()));
893 else if (m_compound_id ==
"VAL")
895 auto centre = get_atom_by_atom_id(
"CB");
896 auto atom1 = get_atom_by_atom_id(
"CA");
897 auto atom2 = get_atom_by_atom_id(
"CG1");
898 auto atom3 = get_atom_by_atom_id(
"CG2");
900 result = dot_product(atom1.get_location() - centre.get_location(),
901 cross_product(atom2.get_location() - centre.get_location(), atom3.get_location() - centre.get_location()));
907 bool monomer::are_bonded(
const monomer &
a,
const monomer &
b,
float errorMargin)
914 a.get_atom_by_atom_id(
"CA").get_location(),
915 a.get_atom_by_atom_id(
"C").get_location(),
916 b.get_atom_by_atom_id(
"N").get_location(),
917 b.get_atom_by_atom_id(
"CA").get_location()};
919 auto distanceCACA =
distance(atoms[0], atoms[3]);
920 double omega = dihedral_angle(atoms[0], atoms[1], atoms[2], atoms[3]);
923 float maxCACADistance = cis ? 3.0f : 3.8f;
925 result =
std::abs(distanceCACA - maxCACADistance) < errorMargin;
934 float monomer::omega(
const monomer &a,
const monomer &b)
938 auto a1 = a.get_atom_by_atom_id(
"CA");
939 auto a2 = a.get_atom_by_atom_id(
"C");
940 auto a3 = b.get_atom_by_atom_id(
"N");
941 auto a4 = b.get_atom_by_atom_id(
"CA");
943 if (a1 and a2 and a3 and a4)
944 result =
static_cast<float>(dihedral_angle(
953 bool monomer::is_cis(
const monomer &a,
const monomer &b)
955 return std::abs(omega(a, b)) < 30.0f;
961 polymer::polymer(structure &s,
const std::string &entityID,
const std::string &asym_id,
const std::string &auth_asym_id)
962 : m_structure(const_cast<structure *>(&s))
963 , m_entity_id(entityID)
965 , m_auth_asym_id(auth_asym_id)
967 using namespace cif::literals;
969 std::map<size_t, size_t> ix;
971 auto &poly_seq_scheme = s.get_datablock()[
"pdbx_poly_seq_scheme"];
972 reserve(poly_seq_scheme.size());
974 for (
auto r : poly_seq_scheme.find(
"asym_id"_key == asym_id))
977 std::optional<int> pdbSeqNum;
978 std::string compoundID, authSeqID, pdbInsCode;
979 cif::tie(seqID, authSeqID, compoundID, pdbInsCode, pdbSeqNum) = r.get(
"seq_id",
"auth_seq_num",
"mon_id",
"pdb_ins_code",
"pdb_seq_num");
981 if (authSeqID.empty() and pdbSeqNum.has_value())
984 size_t index = size();
987 if (not ix.count(seqID))
990 emplace_back(*
this, index, seqID, authSeqID, pdbInsCode, compoundID);
994 monomer
m{*
this,
index, seqID, authSeqID, pdbInsCode, compoundID};
995 std::cerr <<
"Dropping alternate residue " <<
m << std::endl;
1051 sugar::sugar(branch &branch,
const std::string &compoundID,
1052 const std::string &asym_id,
int authSeqID)
1053 : residue(branch.get_structure(), compoundID, asym_id, 0, asym_id,
std::to_string(authSeqID),
"")
1058 sugar::sugar(sugar &&rhs)
1059 : residue(std::forward<residue>(rhs))
1060 , m_branch(rhs.m_branch)
1065 sugar &sugar::operator=(sugar &&rhs)
1069 residue::operator=(std::forward<residue>(rhs));
1070 m_branch = rhs.m_branch;
1091 std::string sugar::name()
const 1095 if (m_compound_id ==
"MAN")
1096 result +=
"alpha-D-mannopyranose";
1097 else if (m_compound_id ==
"BMA")
1098 result +=
"beta-D-mannopyranose";
1099 else if (m_compound_id ==
"NAG")
1100 result +=
"2-acetamido-2-deoxy-beta-D-glucopyranose";
1101 else if (m_compound_id ==
"NDG")
1102 result +=
"2-acetamido-2-deoxy-alpha-D-glucopyranose";
1103 else if (m_compound_id ==
"FUC")
1104 result +=
"alpha-L-fucopyranose";
1105 else if (m_compound_id ==
"FUL")
1106 result +=
"beta-L-fucopyranose";
1109 auto compound = compound_factory::instance().create(m_compound_id);
1111 result += compound->name();
1113 result += m_compound_id;
1119 cif::mm::atom sugar::add_atom(row_initializer atom_info)
1121 auto &db = m_structure->get_datablock();
1122 auto &atom_site = db[
"atom_site"];
1124 auto atom_id = atom_site.get_unique_id(
"");
1126 atom_info.set_value({
"group_PDB",
"HETATM"});
1127 atom_info.set_value({
"id", atom_id});
1128 atom_info.set_value({
"label_entity_id", m_branch->get_entity_id()});
1129 atom_info.set_value({
"label_asym_id", m_branch->get_asym_id()});
1130 atom_info.set_value({
"label_comp_id", m_compound_id});
1131 atom_info.set_value({
"label_seq_id",
"."});
1132 atom_info.set_value({
"label_alt_id",
"."});
1133 atom_info.set_value({
"auth_asym_id", m_branch->get_asym_id()});
1134 atom_info.set_value({
"auth_comp_id", m_compound_id});
1135 atom_info.set_value({
"auth_seq_id", m_auth_seq_id});
1136 atom_info.set_value({
"occupancy", 1.0, 2});
1137 atom_info.set_value({
"B_iso_or_equiv", 30.0, 2});
1138 atom_info.set_value({
"pdbx_PDB_model_num", 1});
1140 auto row = atom_site.emplace(std::move(atom_info));
1141 auto result = m_structure->emplace_atom(db, row);
1143 residue::add_atom(result);
1148 branch::branch(structure &structure,
const std::string &asym_id,
const std::string &entity_id)
1149 : m_structure(&structure)
1150 , m_asym_id(asym_id)
1151 , m_entity_id(entity_id)
1155 auto &db = structure.get_datablock();
1156 auto &struct_asym = db[
"struct_asym"];
1157 auto &branch_scheme = db[
"pdbx_branch_scheme"];
1158 auto &branch_link = db[
"pdbx_entity_branch_link"];
1160 for (
const auto &asym_entity_id : struct_asym.find<std::string>(
"id"_key == asym_id,
"entity_id"))
1162 for (
const auto &[comp_id, num] : branch_scheme.find<std::string,
int>(
1163 "asym_id"_key == asym_id,
"mon_id",
"pdb_seq_num"))
1165 emplace_back(*
this, comp_id, asym_id, num);
1168 for (
const auto &[num1, num2, atom1, atom2] : branch_link.find<
size_t,
size_t, std::string, std::string>(
1169 "entity_id"_key == asym_entity_id,
"entity_branch_list_num_1",
"entity_branch_list_num_2",
"atom_id_1",
"atom_id_2"))
1174 auto &s1 = at(num1 - 1);
1175 auto &s2 = at(num2 - 1);
1177 s1.set_link(s2.get_atom_by_atom_id(atom2));
1184 void branch::link_atoms()
1190 auto &db = m_structure->get_datablock();
1191 auto &branch_link = db[
"pdbx_entity_branch_link"];
1193 auto entity_id = front().get_entity_id();
1195 for (
const auto &[num1, num2, atom1, atom2] : branch_link.find<
size_t,
size_t, std::string, std::string>(
1196 "entity_id"_key == entity_id,
"entity_branch_list_num_1",
"entity_branch_list_num_2",
"atom_id_1",
"atom_id_2"))
1201 auto &s1 = at(num1 - 1);
1202 auto &s2 = at(num2 - 1);
1204 s1.set_link(s2.get_atom_by_atom_id(atom2));
1209 sugar &branch::get_sugar_by_num(
int nr)
1211 auto i = find_if(begin(), end(), [nr](
const sugar &s) {
return s.num() == nr; });
1213 throw std::out_of_range(
"Sugar with num " +
std::to_string(nr) +
" not found in branch " + m_asym_id);
1218 std::string branch::name()
const 1220 return empty() ?
"" : name(front());
1223 sugar &branch::construct_sugar(
const std::string &compound_id)
1225 auto &db = m_structure->get_datablock();
1227 auto compound = compound_factory::instance().create(compound_id);
1228 if (compound ==
nullptr)
1229 throw std::runtime_error(
"Trying to insert unknown compound " + compound_id +
" (not found in CCD)");
1231 auto &chemComp = db[
"chem_comp"];
1232 auto r = chemComp.find(key(
"id") == compound_id);
1236 {
"id", compound_id},
1237 {
"name", compound->name()},
1238 {
"formula", compound->formula()},
1239 {
"formula_weight", compound->formula_weight()},
1240 {
"type", compound->type()}});
1243 sugar &result = emplace_back(*
this, compound_id, m_asym_id, static_cast<int>(size() + 1));
1245 db[
"pdbx_branch_scheme"].emplace({
1246 {
"asym_id", result.get_asym_id()},
1247 {
"entity_id", result.get_entity_id()},
1248 {
"num", result.num()},
1249 {
"mon_id", result.get_compound_id()},
1251 {
"pdb_asym_id", result.get_asym_id()},
1252 {
"pdb_seq_num", result.num()},
1253 {
"pdb_mon_id", result.get_compound_id()},
1255 {
"auth_asym_id", result.get_auth_asym_id()},
1256 {
"auth_mon_id", result.get_compound_id()},
1257 {
"auth_seq_num", result.get_auth_seq_id()},
1265 sugar &branch::construct_sugar(
const std::string &compound_id,
const std::string &atom_id,
1266 int linked_sugar_nr,
const std::string &linked_atom_id)
1268 auto &result = construct_sugar(compound_id);
1270 auto &linked = get_sugar_by_num(linked_sugar_nr);
1271 result.set_link(linked.get_atom_by_atom_id(linked_atom_id));
1273 auto &db = m_structure->get_datablock();
1275 auto &pdbx_entity_branch_link = db[
"pdbx_entity_branch_link"];
1276 auto linkID = pdbx_entity_branch_link.get_unique_id(
"");
1278 db[
"pdbx_entity_branch_link"].emplace({
1279 {
"link_id", linkID },
1280 {
"entity_id", get_entity_id() },
1281 {
"entity_branch_list_num_1", result.num() },
1282 {
"comp_id_1", compound_id },
1283 {
"atom_id_1", atom_id },
1284 {
"leaving_atom_id_1",
"O1" },
1285 {
"entity_branch_list_num_2", linked.num() },
1286 {
"comp_id_2", linked.get_compound_id() },
1287 {
"atom_id_2", linked_atom_id },
1288 {
"leaving_atom_id_2",
"." },
1289 {
"value_order",
"sing" }
1295 std::string branch::name(
const sugar &s)
const 1301 for (
auto &sn : *
this)
1303 if (not sn.get_link()
or sn.get_link().get_auth_seq_id() != s.get_auth_seq_id())
1306 auto n = name(sn) +
"-(1-" + sn.get_link().get_label_atom_id().substr(1) +
')';
1308 result = result.empty() ?
n : result +
"-[" +
n +
']';
1311 if (not result.empty() and result.back() !=
']')
1314 return result + s.name();
1317 float branch::weight()
const 1319 return std::accumulate(begin(), end(), 0.
f, [](
float sum,
const sugar &s)
1321 auto compound = compound_factory::instance().create(s.get_compound_id());
1323 sum += compound->formula_weight();
1330 structure::structure(file &p,
size_t modelNr, StructureOpenOptions options)
1331 : structure(p.front(), modelNr, options)
1335 structure::structure(datablock &db,
size_t modelNr, StructureOpenOptions options)
1337 , m_model_nr(modelNr)
1339 auto &atomCat = db[
"atom_site"];
1341 load_atoms_for_model(options);
1344 if (m_atoms.empty() and m_model_nr == 1)
1346 std::optional<size_t> model_nr;
1347 cif::tie(model_nr) = atomCat.front().get(
"pdbx_PDB_model_num");
1348 if (model_nr and *model_nr != m_model_nr)
1351 std::cerr <<
"No atoms loaded for model 1, trying model " << *model_nr << std::endl;
1352 m_model_nr = *model_nr;
1353 load_atoms_for_model(options);
1357 if (m_atoms.empty())
1360 std::cerr <<
"Warning: no atoms loaded" << std::endl;
1366 void structure::load_atoms_for_model(StructureOpenOptions options)
1368 auto &atomCat = m_db[
"atom_site"];
1370 for (
const auto &a : atomCat)
1372 std::string id, type_symbol;
1373 std::optional<size_t> model_nr;
1375 cif::tie(
id, type_symbol, model_nr) = a.get(
"id",
"type_symbol",
"pdbx_PDB_model_num");
1377 if (model_nr and *model_nr != m_model_nr)
1380 if ((options bitand StructureOpenOptions::SkipHydrogen) and (type_symbol ==
"H" or type_symbol ==
"D"))
1383 emplace_atom(std::make_shared<atom::atom_impl>(m_db,
id));
1402 void structure::load_data()
1404 auto &polySeqScheme = m_db[
"pdbx_poly_seq_scheme"];
1406 for (
const auto &[asym_id, auth_asym_id, entityID] : polySeqScheme.rows<std::string,std::string,std::string>(
"asym_id",
"pdb_strand_id",
"entity_id"))
1408 if (m_polymers.empty()
or m_polymers.back().get_asym_id() != asym_id
or m_polymers.back().get_entity_id() != entityID)
1409 m_polymers.emplace_back(*
this, entityID, asym_id, auth_asym_id);
1412 auto &branchScheme = m_db[
"pdbx_branch_scheme"];
1414 for (
const auto &[asym_id, entity_id] : branchScheme.rows<std::string,std::string>(
"asym_id",
"entity_id"))
1416 if (m_branches.empty()
or m_branches.back().get_asym_id() != asym_id)
1417 m_branches.emplace_back(*
this, asym_id, entity_id);
1420 auto &nonPolyScheme = m_db[
"pdbx_nonpoly_scheme"];
1422 for (
const auto&[asym_id, monID, pdbStrandID, pdbSeqNum, pdbInsCode] :
1423 nonPolyScheme.rows<std::string,std::string,std::string,std::string,std::string>(
"asym_id",
"mon_id",
"pdb_strand_id",
"pdb_seq_num",
"pdb_ins_code"))
1424 m_non_polymers.emplace_back(*
this, monID, asym_id, 0, pdbStrandID, pdbSeqNum, pdbInsCode);
1428 using key_type = std::tuple<std::string, int, std::string>;
1429 std::map<key_type, residue *> resMap;
1431 for (
auto &poly : m_polymers)
1433 for (
auto &res : poly)
1434 resMap[{res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id()}] = &res;
1437 for (
auto &res : m_non_polymers)
1438 resMap[{res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id()}] = &res;
1440 std::set<std::string> sugars;
1441 for (
auto &branch : m_branches)
1443 for (
auto &sugar : branch)
1445 resMap[{sugar.get_asym_id(), sugar.get_seq_id(), sugar.get_auth_seq_id()}] = &sugar;
1446 sugars.insert(sugar.get_compound_id());
1450 for (
auto &atom : m_atoms)
1452 key_type
k(atom.get_label_asym_id(), atom.get_label_seq_id(), atom.get_auth_seq_id());
1453 auto ri = resMap.find(
k);
1455 if (ri == resMap.end())
1458 std::cerr <<
"Missing residue for atom " << atom << std::endl;
1461 for (
auto &res : m_non_polymers)
1463 if (res.get_asym_id() != atom.get_label_asym_id())
1473 ri->second->add_atom(atom);
1477 m_branches.erase(std::remove_if(m_branches.begin(), m_branches.end(), [](
const branch &
b) {
return b.empty(); }), m_branches.end());
1479 for (
auto &branch : m_branches)
1480 branch.link_atoms();
1483 EntityType structure::get_entity_type_for_entity_id(
const std::string entityID)
const 1487 auto &entity = m_db[
"entity"];
1488 auto entity_type = entity.find1<std::string>(
"id"_key == entityID,
"type");
1492 if (
iequals(entity_type,
"polymer"))
1493 result = EntityType::polymer;
1494 else if (
iequals(entity_type,
"non-polymer"))
1495 result = EntityType::NonPolymer;
1496 else if (
iequals(entity_type,
"macrolide"))
1497 result = EntityType::Macrolide;
1498 else if (
iequals(entity_type,
"water"))
1499 result = EntityType::Water;
1500 else if (
iequals(entity_type,
"branched"))
1501 result = EntityType::Branched;
1503 throw std::runtime_error(
"Unknown entity type " + entity_type);
1508 EntityType structure::get_entity_type_for_asym_id(
const std::string asym_id)
const 1512 auto &struct_asym = m_db[
"struct_asym"];
1513 auto entityID = struct_asym.find1<std::string>(
"id"_key == asym_id,
"entity_id");
1515 return get_entity_type_for_entity_id(entityID);
1542 atom structure::get_atom_by_id(
const std::string &
id)
const 1544 assert(m_atoms.size() == m_atom_index.size());
1546 int L = 0, R =
static_cast<int>(m_atoms.size() - 1);
1549 int i = (L + R) / 2;
1551 const atom &atom = m_atoms[m_atom_index[
i]];
1553 int d = atom.id().compare(
id);
1564 throw std::out_of_range(
"Could not find atom with id " +
id);
1567 atom structure::get_atom_by_label(
const std::string &atom_id,
const std::string &asym_id,
const std::string &compID,
int seqID,
const std::string &altID)
1569 for (
auto &a : m_atoms)
1571 if (a.get_label_atom_id() == atom_id and
1572 a.get_label_asym_id() == asym_id and
1573 a.get_label_comp_id() == compID and
1574 a.get_label_seq_id() == seqID and
1575 a.get_label_alt_id() == altID)
1581 throw std::out_of_range(
"Could not find atom with specified label");
1584 atom structure::get_atom_by_position(
point p)
const 1589 for (
size_t i = 0;
i < m_atoms.size(); ++
i)
1591 auto &a = m_atoms.at(
i);
1601 if (index < m_atoms.size())
1602 return m_atoms.at(index);
1607 atom structure::get_atom_by_position_and_type(
point p, std::string_view
type, std::string_view res_type)
const 1612 for (
size_t i = 0;
i < m_atoms.size(); ++
i)
1614 auto &a = m_atoms.at(
i);
1616 if (a.get_label_comp_id() != res_type)
1619 if (a.get_label_atom_id() !=
type)
1630 if (index < m_atoms.size())
1631 return m_atoms.at(index);
1636 polymer &structure::get_polymer_by_asym_id(
const std::string &asym_id)
1638 for (
auto &poly : m_polymers)
1640 if (poly.get_asym_id() != asym_id)
1646 throw std::runtime_error(
"polymer with asym id " + asym_id +
" not found");
1649 residue &structure::create_residue(
const std::vector<atom> &atoms)
1651 return m_non_polymers.emplace_back(*
this, atoms);
1654 residue &structure::get_residue(
const std::string &asym_id,
int seqID,
const std::string &authSeqID)
1658 for (
auto &res : m_non_polymers)
1660 if (res.get_asym_id() == asym_id and (authSeqID.empty()
or res.get_auth_seq_id() == authSeqID))
1665 for (
auto &poly : m_polymers)
1667 if (poly.get_asym_id() != asym_id)
1670 for (
auto &res : poly)
1672 if (res.get_seq_id() == seqID)
1677 for (
auto &branch : m_branches)
1679 if (branch.get_asym_id() != asym_id)
1682 for (
auto &sugar : branch)
1684 if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == authSeqID)
1689 std::string desc = asym_id;
1694 if (not authSeqID.empty())
1695 desc +=
"-" + authSeqID;
1697 throw std::out_of_range(
"Could not find residue " + desc);
1700 residue &structure::get_residue(
const std::string &asym_id,
const std::string &compID,
int seqID,
const std::string &authSeqID)
1704 for (
auto &res : m_non_polymers)
1706 if (res.get_asym_id() == asym_id and res.get_auth_seq_id() == authSeqID and res.get_compound_id() == compID)
1711 for (
auto &poly : m_polymers)
1713 if (poly.get_asym_id() != asym_id)
1716 for (
auto &res : poly)
1718 if (res.get_seq_id() == seqID and res.get_compound_id() == compID)
1723 for (
auto &branch : m_branches)
1725 if (branch.get_asym_id() != asym_id)
1728 for (
auto &sugar : branch)
1730 if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == authSeqID and sugar.get_compound_id() == compID)
1735 std::string desc = asym_id;
1740 if (not authSeqID.empty())
1741 desc +=
"-" + authSeqID;
1743 throw std::out_of_range(
"Could not find residue " + desc +
" of type " + compID);
1746 branch &structure::get_branch_by_asym_id(
const std::string &asym_id)
1748 for (
auto &branch : m_branches)
1750 if (branch.get_asym_id() == asym_id)
1754 throw std::runtime_error(
"branch not found for asym id " + asym_id);
1757 std::string structure::insert_compound(
const std::string &compoundID,
bool is_entity)
1761 auto compound = compound_factory::instance().create(compoundID);
1762 if (compound ==
nullptr)
1763 throw std::runtime_error(
"Trying to insert unknown compound " + compoundID +
" (not found in CCD)");
1765 auto &chemComp = m_db[
"chem_comp"];
1766 auto r = chemComp.find(key(
"id") == compoundID);
1771 {
"name", compound->name()},
1772 {
"formula", compound->formula()},
1773 {
"formula_weight", compound->formula_weight()},
1774 {
"type", compound->type()}});
1777 std::string entity_id;
1781 auto &pdbxEntityNonpoly = m_db[
"pdbx_entity_nonpoly"];
1783 entity_id = pdbxEntityNonpoly.find_first<std::string>(
"comp_id"_key == compoundID,
"entity_id");
1785 if (entity_id.empty())
1787 auto &entity = m_db[
"entity"];
1788 entity_id = entity.get_unique_id(
"");
1792 {
"type",
"non-polymer"},
1793 {
"pdbx_description", compound->name()},
1794 {
"formula_weight", compound->formula_weight()}});
1796 pdbxEntityNonpoly.emplace({
1797 {
"entity_id", entity_id},
1798 {
"name", compound->name()},
1799 {
"comp_id", compoundID}});
1808 atom &structure::emplace_atom(atom &&atom)
1810 int L = 0, R =
static_cast<int>(m_atom_index.size() - 1);
1813 int i = (L + R) / 2;
1815 auto &ai = m_atoms[m_atom_index[
i]];
1817 int d = ai.id().compare(atom.id());
1820 throw std::runtime_error(
"Duplicate atom ID " + atom.id());
1829 m_atom_index.insert(m_atom_index.begin(), m_atoms.size());
1831 m_atom_index.insert(m_atom_index.begin() + R + 1, m_atoms.size());
1834 auto &atom_type = m_db[
"atom_type"];
1835 std::string symbol = atom.get_property(
"type_symbol");
1837 using namespace cif::literals;
1838 if (not atom_type.exists(
"symbol"_key == symbol))
1839 atom_type.emplace({ {
"symbol", symbol } });
1841 return m_atoms.emplace_back(std::move(atom));
1844 void structure::remove_atom(atom &
a,
bool removeFromResidue)
1848 auto &atomSites = m_db[
"atom_site"];
1850 if (removeFromResidue)
1854 auto &res = get_residue(
a);
1855 res.m_atoms.erase(std::remove(res.m_atoms.begin(), res.m_atoms.end(),
a), res.m_atoms.end());
1857 catch (
const std::exception &ex)
1860 std::cerr <<
"Error removing atom from residue: " << ex.what() << std::endl;
1864 atomSites.erase(
"id"_key ==
a.id());
1866 assert(m_atom_index.size() == m_atoms.size());
1869 bool removed =
false;
1872 int L = 0, R =
static_cast<int>(m_atom_index.size() - 1);
1875 int i = (L + R) / 2;
1877 const atom &atom = m_atoms[m_atom_index[
i]];
1879 int d = atom.id().compare(
a.id());
1883 m_atoms.erase(m_atoms.begin() + m_atom_index[
i]);
1885 auto ai = m_atom_index[
i];
1886 m_atom_index.erase(m_atom_index.begin() +
i);
1888 for (
auto &
j : m_atom_index)
1909 void structure::swap_atoms(atom a1, atom a2)
1911 auto &atomSites = m_db[
"atom_site"];
1915 auto r1 = atomSites.find1(key(
"id") == a1.id());
1916 auto r2 = atomSites.find1(key(
"id") == a2.id());
1918 auto l1 =
r1[
"label_atom_id"];
1919 auto l2 =
r2[
"label_atom_id"];
1922 auto l3 =
r1[
"auth_atom_id"];
1923 auto l4 =
r2[
"auth_atom_id"];
1926 catch (
const std::exception &ex)
1928 std::throw_with_nested(std::runtime_error(
"Failed to swap atoms"));
1932 void structure::move_atom(atom
a,
point p)
1937 void structure::change_residue(residue &res,
const std::string &newCompound,
1938 const std::vector<std::tuple<std::string, std::string>> &remappedAtoms)
1942 std::string asym_id = res.get_asym_id();
1944 const auto compound = compound_factory::instance().create(newCompound);
1946 throw std::runtime_error(
"Unknown compound " + newCompound);
1951 std::string entityID;
1952 if (res.is_entity())
1955 auto &entity = m_db[
"entity"];
1957 entityID = entity.find_first<std::string>(
"type"_key ==
"non-polymer" and
"pdbx_description"_key == compound->name(),
"id");
1959 if (entityID.empty())
1961 entityID = entity.get_unique_id(
"");
1962 entity.emplace({{
"id", entityID},
1963 {
"type",
"non-polymer"},
1964 {
"pdbx_description", compound->name()},
1965 {
"formula_weight", compound->formula_weight()}});
1967 auto &pdbxEntityNonpoly = m_db[
"pdbx_entity_nonpoly"];
1968 pdbxEntityNonpoly.emplace({{
"entity_id", entityID},
1969 {
"name", compound->name()},
1970 {
"comp_id", newCompound}});
1973 auto &pdbxNonPolyScheme = m_db[
"pdbx_nonpoly_scheme"];
1974 for (
auto nps : pdbxNonPolyScheme.find(
"asym_id"_key == asym_id))
1976 nps.assign(
"mon_id", newCompound,
true);
1977 nps.assign(
"pdb_mon_id", newCompound,
true);
1978 nps.assign(
"auth_mon_id", newCompound,
true);
1979 nps.assign(
"entity_id", entityID,
true);
1983 auto &chemComp = m_db[
"chem_comp"];
1984 if (not chemComp.exists(key(
"id") == newCompound))
1986 chemComp.emplace({{
"id", newCompound},
1987 {
"name", compound->name()},
1988 {
"formula", compound->formula()},
1989 {
"formula_weight", compound->formula_weight()},
1990 {
"type", compound->type()}});
1994 m_db[
"struct_asym"].update_value(
"id"_key == asym_id,
"entity_id", entityID);
1997 insert_compound(newCompound,
false);
1999 res.set_compound_id(newCompound);
2001 auto &atomSites = m_db[
"atom_site"];
2002 auto atoms = res.atoms();
2004 for (
const auto &[a1, a2] : remappedAtoms)
2006 auto i = find_if(atoms.begin(), atoms.end(), [
id = a1](
const atom &
a)
2007 {
return a.get_label_atom_id() == id; });
2008 if (
i == atoms.end())
2011 std::cerr <<
"Missing atom for atom ID " << a1 << std::endl;
2015 auto r = atomSites.find(key(
"id") ==
i->id());
2020 if (a2.empty()
or a2 ==
".")
2024 auto ra = r.front();
2025 ra[
"label_atom_id"] = a2;
2026 ra[
"auth_atom_id"] = a2;
2027 ra[
"type_symbol"] = atom_type_traits(compound->get_atom_by_atom_id(a2).type_symbol).symbol();
2031 for (
auto a : atoms)
2033 atomSites.update_value(key(
"id") ==
a.id(),
"label_comp_id", newCompound);
2034 atomSites.update_value(key(
"id") ==
a.id(),
"auth_comp_id", newCompound);
2038 void structure::remove_residue(
const std::string &asym_id,
int seq_id,
const std::string &auth_seq_id)
2042 for (
auto &res : m_non_polymers)
2044 if (res.get_asym_id() == asym_id and (auth_seq_id.empty()
or res.get_auth_seq_id() == auth_seq_id))
2046 remove_residue(res);
2052 for (
auto &poly : m_polymers)
2054 if (poly.get_asym_id() != asym_id)
2057 for (
auto &res : poly)
2059 if (res.get_seq_id() == seq_id)
2061 remove_residue(res);
2067 for (
auto &branch : m_branches)
2069 if (branch.get_asym_id() != asym_id)
2072 for (
auto &sugar : branch)
2074 if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == auth_seq_id)
2076 remove_residue(sugar);
2083 void structure::remove_residue(residue &res)
2087 auto atoms = res.atoms();
2089 switch (res.entity_type())
2091 case EntityType::polymer:
2093 auto &
m =
dynamic_cast<monomer &
>(res);
2095 m_db[
"pdbx_poly_seq_scheme"].erase(
2096 "asym_id"_key == res.get_asym_id() and
2097 "seq_id"_key == res.get_seq_id());
2099 for (
auto &poly : m_polymers)
2100 poly.erase(std::remove(poly.begin(), poly.end(),
m), poly.end());
2104 case EntityType::NonPolymer:
2105 m_db[
"pdbx_nonpoly_scheme"].erase(
"asym_id"_key == res.get_asym_id());
2106 m_db[
"struct_asym"].erase(
"id"_key == res.get_asym_id());
2107 m_non_polymers.erase(std::remove(m_non_polymers.begin(), m_non_polymers.end(), res), m_non_polymers.end());
2110 case EntityType::Water:
2111 m_db[
"pdbx_nonpoly_scheme"].erase(
"asym_id"_key == res.get_asym_id());
2112 m_non_polymers.erase(std::remove(m_non_polymers.begin(), m_non_polymers.end(), res), m_non_polymers.end());
2115 case EntityType::Branched:
2117 auto &s =
dynamic_cast<sugar&
>(res);
2125 case EntityType::Macrolide:
2127 throw std::runtime_error(
"no support for macrolides yet");
2130 for (
auto atom : atoms)
2131 remove_atom(atom,
false);
2134 void structure::remove_sugar(sugar &s)
2138 std::string asym_id = s.get_asym_id();
2139 branch &branch = get_branch_by_asym_id(asym_id);
2140 auto si =
std::find(branch.begin(), branch.end(), s);
2141 if (si == branch.end())
2142 throw std::runtime_error(
"sugar not part of branch");
2143 size_t six = si - branch.begin();
2146 remove_branch(branch);
2149 std::set<size_t> dix;
2150 std::stack<size_t> test;
2153 while (not test.empty())
2155 auto tix = test.top();
2163 for (
auto &s2 : branch)
2165 if (s2.get_link_nr() == tix)
2166 test.push(s2.num());
2169 for (
auto atom : branch[tix - 1].atoms())
2170 remove_atom(atom,
false);
2173 branch.erase(remove_if(branch.begin(), branch.end(), [dix](
const sugar &s) {
return dix.count(s.num()); }), branch.end());
2175 auto entity_id = create_entity_for_branch(branch);
2178 auto &struct_asym = m_db[
"struct_asym"];
2179 auto r = struct_asym.find1(
"id"_key == asym_id);
2180 r[
"entity_id"] = entity_id;
2182 for (
auto &sugar : branch)
2184 for (
auto atom : sugar.atoms())
2185 atom.set_property(
"label_entity_id", entity_id);
2188 auto &pdbx_branch_scheme = m_db[
"pdbx_branch_scheme"];
2189 pdbx_branch_scheme.erase(
"asym_id"_key == asym_id);
2191 for (
auto &sugar : branch)
2193 pdbx_branch_scheme.emplace({
2194 {
"asym_id", asym_id},
2195 {
"entity_id", entity_id},
2196 {
"num", sugar.num()},
2197 {
"mon_id", sugar.get_compound_id()},
2199 {
"pdb_asym_id", asym_id},
2200 {
"pdb_seq_num", sugar.num()},
2201 {
"pdb_mon_id", sugar.get_compound_id()},
2204 {
"auth_asym_id", asym_id},
2205 {
"auth_mon_id", sugar.get_compound_id()},
2206 {
"auth_seq_num", sugar.get_auth_seq_id()},
2214 void structure::remove_branch(branch &branch)
2218 for (
auto &sugar : branch)
2220 auto atoms = sugar.atoms();
2221 for (
auto atom : atoms)
2225 m_db[
"pdbx_branch_scheme"].erase(
"asym_id"_key == branch.get_asym_id());
2226 m_db[
"struct_asym"].erase(
"id"_key == branch.get_asym_id());
2228 m_branches.erase(
remove(m_branches.begin(), m_branches.end(), branch), m_branches.end());
2231 std::string structure::create_non_poly_entity(
const std::string &comp_id)
2233 return insert_compound(comp_id,
true);
2236 std::string structure::create_non_poly(
const std::string &entity_id,
const std::vector<atom> &atoms)
2238 using namespace cif::literals;
2240 auto &struct_asym = m_db[
"struct_asym"];
2241 std::string asym_id = struct_asym.get_unique_id();
2243 struct_asym.emplace({
2245 {
"pdbx_blank_PDB_chainid_flag",
"N"},
2246 {
"pdbx_modified",
"N"},
2247 {
"entity_id", entity_id},
2251 std::string comp_id = m_db[
"pdbx_entity_nonpoly"].find1<std::string>(
"entity_id"_key == entity_id,
"comp_id");
2253 auto &atom_site = m_db[
"atom_site"];
2255 auto &res = m_non_polymers.emplace_back(*
this, comp_id, asym_id, 0, asym_id,
"1",
"");
2257 for (
auto &atom : atoms)
2259 auto atom_id = atom_site.get_unique_id(
"");
2261 auto row = atom_site.emplace({
2262 {
"group_PDB", atom.get_property(
"group_PDB")},
2264 {
"type_symbol", atom.get_property(
"type_symbol")},
2265 {
"label_atom_id", atom.get_property(
"label_atom_id")},
2266 {
"label_alt_id", atom.get_property(
"label_alt_id")},
2267 {
"label_comp_id", comp_id},
2268 {
"label_asym_id", asym_id},
2269 {
"label_entity_id", entity_id},
2270 {
"label_seq_id",
"."},
2271 {
"pdbx_PDB_ins_code",
""},
2272 {
"Cartn_x", atom.get_property(
"Cartn_x")},
2273 {
"Cartn_y", atom.get_property(
"Cartn_y")},
2274 {
"Cartn_z", atom.get_property(
"Cartn_z")},
2275 {
"occupancy", atom.get_property(
"occupancy")},
2276 {
"B_iso_or_equiv", atom.get_property(
"B_iso_or_equiv")},
2277 {
"pdbx_formal_charge", atom.get_property(
"pdbx_formal_charge")},
2279 {
"auth_comp_id", comp_id},
2280 {
"auth_asym_id", asym_id},
2281 {
"auth_atom_id", atom.get_property(
"label_atom_id")},
2282 {
"pdbx_PDB_model_num", 1}
2285 auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
2286 res.add_atom(newAtom);
2289 auto &pdbx_nonpoly_scheme = m_db[
"pdbx_nonpoly_scheme"];
2290 size_t ndb_nr = pdbx_nonpoly_scheme.find(
"asym_id"_key == asym_id and
"entity_id"_key == entity_id).size() + 1;
2291 pdbx_nonpoly_scheme.emplace({
2292 {
"asym_id", asym_id},
2293 {
"entity_id", entity_id},
2294 {
"mon_id", comp_id},
2295 {
"ndb_seq_num", ndb_nr},
2296 {
"pdb_seq_num", res.get_auth_seq_id()},
2297 {
"auth_seq_num", res.get_auth_seq_id()},
2298 {
"pdb_mon_id", comp_id},
2299 {
"auth_mon_id", comp_id},
2300 {
"pdb_strand_id", asym_id},
2301 {
"pdb_ins_code",
"."},
2307 std::string structure::create_non_poly(
const std::string &entity_id, std::vector<row_initializer> atoms)
2311 auto &struct_asym = m_db[
"struct_asym"];
2312 std::string asym_id = struct_asym.get_unique_id();
2314 struct_asym.emplace({
2316 {
"pdbx_blank_PDB_chainid_flag",
"N"},
2317 {
"pdbx_modified",
"N"},
2318 {
"entity_id", entity_id},
2322 std::string comp_id = m_db[
"pdbx_entity_nonpoly"].find1<std::string>(
"entity_id"_key == entity_id,
"comp_id");
2324 auto &atom_site = m_db[
"atom_site"];
2326 auto &res = m_non_polymers.emplace_back(*
this, comp_id, asym_id, 0, asym_id,
"1",
"");
2328 for (
auto &atom : atoms)
2330 auto atom_id = atom_site.get_unique_id(
"");
2332 atom.set_value(
"name", atom_id);
2333 atom.set_value(
"label_asym_id", asym_id);
2334 atom.set_value(
"auth_asym_id", asym_id);
2335 atom.set_value(
"label_entity_id", entity_id);
2337 atom.set_value_if_empty({
"group_PDB",
"HETATM"});
2338 atom.set_value_if_empty({
"label_comp_id", comp_id});
2339 atom.set_value_if_empty({
"label_seq_id",
""});
2340 atom.set_value_if_empty({
"auth_comp_id", comp_id});
2341 atom.set_value_if_empty({
"auth_seq_id", 1});
2342 atom.set_value_if_empty({
"pdbx_PDB_model_num", 1});
2343 atom.set_value_if_empty({
"label_alt_id",
""});
2345 auto row = atom_site.emplace(atom.begin(), atom.end());
2347 auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
2348 res.add_atom(newAtom);
2351 auto &pdbx_nonpoly_scheme = m_db[
"pdbx_nonpoly_scheme"];
2352 size_t ndb_nr = pdbx_nonpoly_scheme.find(
"asym_id"_key == asym_id and
"entity_id"_key == entity_id).size() + 1;
2353 pdbx_nonpoly_scheme.emplace({
2354 {
"asym_id", asym_id},
2355 {
"entity_id", entity_id},
2356 {
"mon_id", comp_id},
2357 {
"ndb_seq_num", ndb_nr},
2358 {
"pdb_seq_num", res.get_auth_seq_id()},
2359 {
"auth_seq_num", res.get_auth_seq_id()},
2360 {
"pdb_mon_id", comp_id},
2361 {
"auth_mon_id", comp_id},
2362 {
"pdb_strand_id", asym_id},
2363 {
"pdb_ins_code",
"."},
2369 branch &structure::create_branch()
2371 auto &entity = m_db[
"entity"];
2372 auto &struct_asym = m_db[
"struct_asym"];
2374 auto entity_id = entity.get_unique_id(
"");
2375 auto asym_id = struct_asym.get_unique_id();
2379 {
"type",
"branched"}
2382 struct_asym.emplace({
2384 {
"pdbx_blank_PDB_chainid_flag",
"N"},
2385 {
"pdbx_modified",
"N"},
2386 {
"entity_id", entity_id},
2390 return m_branches.emplace_back(*
this, asym_id, entity_id);
2568 std::string structure::create_entity_for_branch(branch &branch)
2572 std::string entityName = branch.name();
2574 auto &entity = m_db[
"entity"];
2576 std::string entityID = entity.find_first<std::string>(
"type"_key ==
"branched" and
"pdbx_description"_key == entityName,
"id");
2578 if (entityID.empty())
2580 entityID = entity.get_unique_id(
"");
2583 std::cout <<
"Creating new entity " << entityID <<
" for branched sugar " << entityName << std::endl;
2587 {
"type",
"branched"},
2588 {
"src_method",
"man"},
2589 {
"pdbx_description", entityName},
2590 {
"formula_weight", branch.weight()}});
2592 auto &pdbx_entity_branch_list = m_db[
"pdbx_entity_branch_list"];
2593 for (
auto &sugar : branch)
2595 pdbx_entity_branch_list.emplace({
2596 {
"entity_id", entityID},
2597 {
"comp_id", sugar.get_compound_id()},
2598 {
"num", sugar.num()},
2603 auto &pdbx_entity_branch_link = m_db[
"pdbx_entity_branch_link"];
2604 for (
auto &s1 : branch)
2606 auto l2 = s1.get_link();
2611 auto &s2 = branch.at(std::stoi(l2.get_auth_seq_id()) - 1);
2612 auto l1 = s2.get_atom_by_atom_id(
"C1");
2614 pdbx_entity_branch_link.emplace({
2615 {
"link_id", pdbx_entity_branch_link.get_unique_id(
"")},
2616 {
"entity_id", entityID},
2617 {
"entity_branch_list_num_1", s1.get_auth_seq_id()},
2618 {
"comp_id_1", s1.get_compound_id()},
2619 {
"atom_id_1", l1.get_label_atom_id()},
2620 {
"leaving_atom_id_1",
"O1"},
2621 {
"entity_branch_list_num_2", s2.get_auth_seq_id()},
2622 {
"comp_id_2", s2.get_compound_id()},
2623 {
"atom_id_2", l2.get_label_atom_id()},
2624 {
"leaving_atom_id_2",
"H" + l2.get_label_atom_id()},
2625 {
"value_order",
"sing"}
2633 void structure::cleanup_empty_categories()
2637 auto &atomSite = m_db[
"atom_site"];
2640 auto &chem_comp = m_db[
"chem_comp"];
2641 std::vector<row_handle> obsoleteChemComps;
2643 for (
auto chemComp : chem_comp)
2645 std::string compID = chemComp[
"id"].as<std::string>();
2646 if (atomSite.exists(
"label_comp_id"_key == compID
or "auth_comp_id"_key == compID))
2649 obsoleteChemComps.push_back(chemComp);
2652 for (
auto chemComp : obsoleteChemComps)
2653 chem_comp.erase(chemComp);
2657 auto &entities = m_db[
"entity"];
2658 std::vector<row_handle> obsoleteEntities;
2660 for (
auto entity : entities)
2662 std::string entityID = entity[
"id"].as<std::string>();
2663 if (atomSite.exists(
"label_entity_id"_key == entityID))
2666 obsoleteEntities.push_back(entity);
2669 for (
auto entity : obsoleteEntities)
2670 entities.erase(entity);
2674 for (
const char *cat : {
"pdbx_entity_nonpoly"})
2676 auto &category = m_db[cat];
2678 std::vector<row_handle> empty;
2679 for (
auto row : category)
2681 if (not category.has_children(row) and not category.has_parents(row))
2682 empty.push_back(row);
2685 for (
auto row : empty)
2686 category.erase(row);
2690 for (
auto entity : entities)
2692 std::string
type, id;
2693 cif::tie(type,
id) = entity.get(
"type",
"id");
2695 std::optional<size_t> count;
2696 if (type ==
"polymer")
2697 count = m_db[
"struct_asym"].find(
"entity_id"_key ==
id).size();
2698 else if (type ==
"non-polymer" or type ==
"water")
2699 count = m_db[
"pdbx_nonpoly_scheme"].find(
"entity_id"_key ==
id).size();
2700 else if (type ==
"branched")
2703 std::set<std::string> asym_ids;
2704 for (
const auto &asym_id : m_db[
"pdbx_branch_scheme"].find<std::string>(
"entity_id"_key ==
id,
"asym_id"))
2705 asym_ids.insert(asym_id);
2706 count = asym_ids.size();
2709 entity[
"pdbx_number_of_molecules"] = count.value_or(0);
2715 for (
auto &
a : m_atoms)
2721 for (
auto &
a : m_atoms)
2725 void structure::translate_and_rotate(
point t, quaternion q)
2727 for (
auto &
a : m_atoms)
2728 a.translate_and_rotate(t, q);
2731 void structure::translate_rotate_and_translate(
point t1, quaternion q,
point t2)
2733 for (
auto &
a : m_atoms)
2734 a.translate_rotate_and_translate(t1, q, t2);
2737 void structure::validate_atoms()
const 2740 assert(m_atoms.size() == m_atom_index.size());
2741 for (
size_t i = 0;
i +
i < m_atoms.size(); ++
i)
2742 assert(m_atoms[m_atom_index[
i]].
id().compare(m_atoms[m_atom_index[
i + 1]].
id()) < 0);
2744 std::vector<atom> atoms = m_atoms;
2746 auto removeAtomFromList = [&atoms](
const atom &
a)
2748 auto i =
std::find(atoms.begin(), atoms.end(),
a);
2749 assert(
i != atoms.end());
2753 for (
auto &poly : m_polymers)
2755 for (
auto &monomer : poly)
2757 for (
auto &atom : monomer.atoms())
2758 removeAtomFromList(atom);
2762 for (
auto &branch : m_branches)
2764 for (
auto &sugar : branch)
2766 for (
auto &atom : sugar.atoms())
2767 removeAtomFromList(atom);
2771 for (
auto &res : m_non_polymers)
2773 for (
auto &atom : res.atoms())
2774 removeAtomFromList(atom);
2777 assert(atoms.empty());
2784 if (db.get(
"atom_site") ==
nullptr)
2785 throw std::runtime_error(
"Cannot reconstruct PDBx file, atom data missing");
point centroid(const std::vector< point > &pts)
void sqrt(Image< double > &op)
std::vector< SelLine >::iterator find(std::vector< SelLine > &text, const std::string &img_name)
void reconstruct_pdbx(datablock &db)
void abs(Image< double > &op)
bool iequals(std::string_view a, std::string_view b)
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
#define rotate(a, i, j, k, l)
const std::map< std::string, std::vector< std::string > > kChiAtomsMap
void max(Image< double > &op1, const Image< double > &op2)
TYPE distance(struct Point_T *p, struct Point_T *q)
void translate(PDB *pdb_original, PDB *pdb_move, unsigned num_atoms, double x0, double y0, double z0)
double psi(const double x)
std::ostream & operator<<(std::ostream &os, const atom &atom)
std::string to_string(bond_type bondType)