Xmipp  v3.23.11-Nereus
model.cpp
Go to the documentation of this file.
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "cif++.hpp"
28 
29 #include <filesystem>
30 #include <fstream>
31 #include <iomanip>
32 #include <numeric>
33 #include <stack>
34 
35 namespace fs = std::filesystem;
36 
37 namespace cif::mm
38 {
39 
40 // --------------------------------------------------------------------
41 // atom
42 
43 void atom::atom_impl::moveTo(const point &p)
44 {
45  if (m_symop != "1_555")
46  throw std::runtime_error("Moving symmetry copy");
47 
48  auto r = row();
49 
50 #if __cpp_lib_format
51  r.assign("Cartn_x", std::format("{:.3f}", p.m_x), false, false);
52  r.assign("Cartn_y", std::format("{:.3f}", p.m_y), false, false);
53  r.assign("Cartn_z", std::format("{:.3f}", p.m_z), false, false);
54 #else
55  r.assign("Cartn_x", cif::format("%.3f", p.m_x).str(), false, false);
56  r.assign("Cartn_y", cif::format("%.3f", p.m_y).str(), false, false);
57  r.assign("Cartn_z", cif::format("%.3f", p.m_z).str(), false, false);
58 #endif
59  m_location = p;
60 }
61 
62 // const compound *compound() const;
63 
64 std::string atom::atom_impl::get_property(std::string_view name) const
65 {
66  return row()[name].as<std::string>();
67 }
68 
69 int atom::atom_impl::get_property_int(std::string_view name) const
70 {
71  int result = 0;
72  if (not row()[name].empty())
73  {
74  auto s = get_property(name);
75 
76  std::from_chars_result r = std::from_chars(s.data(), s.data() + s.length(), result);
77  if (r.ec != std::errc() and VERBOSE > 0)
78  std::cerr << "Error converting " << s << " to number for property " << name << std::endl;
79  }
80  return result;
81 }
82 
83 float atom::atom_impl::get_property_float(std::string_view name) const
84 {
85  float result = 0;
86  if (not row()[name].empty())
87  {
88  auto s = get_property(name);
89 
90  std::from_chars_result r = cif::from_chars(s.data(), s.data() + s.length(), result);
91  if (r.ec != std::errc() and VERBOSE > 0)
92  std::cerr << "Error converting " << s << " to number for property " << name << std::endl;
93  }
94  return result;
95 }
96 
97 void atom::atom_impl::set_property(const std::string_view name, const std::string &value)
98 {
99  auto r = row();
100  if (not r)
101  throw std::runtime_error("Trying to modify a row that does not exist");
102  r.assign(name, value, true, true);
103 }
104 
105 // int atom::atom_impl::compare(const atom_impl &b) const
106 // {
107 // int d = m_asym_id.compare(b.m_asym_id);
108 // if (d == 0)
109 // d = m_seq_id - b.m_seq_id;
110 // if (d == 0)
111 // d = m_auth_seq_id.compare(b.m_auth_seq_id);
112 // if (d == 0)
113 // d = mAtom_id.compare(b.mAtom_id);
114 
115 // return d;
116 // }
117 
118 // bool atom::atom_impl::getAnisoU(float anisou[6]) const
119 // {
120 // bool result = false;
121 
122 // auto cat = m_db.get("atom_site_anisotrop");
123 // if (cat)
124 // {
125 // for (auto r : cat->find(key("id") == m_id))
126 // {
127 // tie(anisou[0], anisou[1], anisou[2], anisou[3], anisou[4], anisou[5]) =
128 // r.get("U[1][1]", "U[1][2]", "U[1][3]", "U[2][2]", "U[2][3]", "U[3][3]");
129 // result = true;
130 // break;
131 // }
132 // }
133 
134 // return result;
135 // }
136 
137 int atom::atom_impl::get_charge() const
138 {
139  auto formalCharge = row()["pdbx_formal_charge"].as<std::optional<int>>();
140 
141  if (not formalCharge.has_value())
142  {
143  auto c = cif::compound_factory::instance().create(get_property("label_comp_id"));
144 
145  if (c != nullptr and c->atoms().size() == 1)
146  formalCharge = c->atoms().front().charge;
147  }
148 
149  return formalCharge.value_or(0);
150 }
151 
152 // const Compound *atom::atom_impl::compound() const
153 // {
154 // if (mCompound == nullptr)
155 // {
156 // std::string compID = get_property("label_comp_id");
157 
158 // mCompound = compound_factory::instance().create(compID);
159 // }
160 
161 // return mCompound;
162 // }
163 
164 // const std::string atom::atom_impl::get_property(const std::string_view name) const
165 // {
166 // for (auto &&[tag, ref] : mCachedRefs)
167 // {
168 // if (tag == name)
169 // return ref.as<std::string>();
170 // }
171 
172 // mCachedRefs.emplace_back(name, const_cast<Row &>(mRow)[name]);
173 // return std::get<1>(mCachedRefs.back()).as<std::string>();
174 // }
175 
176 // void atom::atom_impl::set_property(const std::string_view name, const std::string &value)
177 // {
178 // for (auto &&[tag, ref] : mCachedRefs)
179 // {
180 // if (tag != name)
181 // continue;
182 
183 // ref = value;
184 // return;
185 // }
186 
187 // mCachedRefs.emplace_back(name, mRow[name]);
188 // std::get<1>(mCachedRefs.back()) = value;
189 // }
190 
191 // const Row atom::getRowAniso() const
192 // {
193 // auto &db = m_impl->m_db;
194 // auto cat = db.get("atom_site_anisotrop");
195 // if (not cat)
196 // return {};
197 // else
198 // return cat->find1(key("id") == m_impl->m_id);
199 // }
200 
201 // float atom::uIso() const
202 // {
203 // float result;
204 
205 // if (not get_property<std::string>("U_iso_or_equiv").empty())
206 // result = get_property<float>("U_iso_or_equiv");
207 // else if (not get_property<std::string>("B_iso_or_equiv").empty())
208 // result = get_property<float>("B_iso_or_equiv") / static_cast<float>(8 * kPI * kPI);
209 // else
210 // throw std::runtime_error("Missing B_iso or U_iso");
211 
212 // return result;
213 // }
214 
215 // const Compound &atom::compound() const
216 // {
217 // auto result = impl().compound();
218 
219 // if (result == nullptr)
220 // {
221 // if (VERBOSE > 0)
222 // std::cerr << "Compound not found: '" << get_property<std::string>("label_comp_id") << '\'' << std::endl;
223 
224 // throw std::runtime_error("no compound");
225 // }
226 
227 // return *result;
228 // }
229 
230 // std::string atom::labelEntityID() const
231 // {
232 // return get_property<std::string>("label_entity_id");
233 // }
234 
235 // std::string atom::authAtom_id() const
236 // {
237 // return get_property<std::string>("auth_atom_id");
238 // }
239 
240 // std::string atom::authCompID() const
241 // {
242 // return get_property<std::string>("auth_comp_id");
243 // }
244 
245 // std::string atom::get_auth_asym_id() const
246 // {
247 // return get_property<std::string>("auth_asym_id");
248 // }
249 
250 // std::string atom::get_pdb_ins_code() const
251 // {
252 // return get_property<std::string>("pdbx_PDB_ins_code");
253 // }
254 
255 // std::string atom::pdbxAuthAltID() const
256 // {
257 // return get_property<std::string>("pdbx_auth_alt_id");
258 // }
259 
260 // void atom::translate(point t)
261 // {
262 // auto loc = location();
263 // loc += t;
264 // location(loc);
265 // }
266 
267 // void atom::rotate(quaternion q)
268 // {
269 // auto loc = location();
270 // loc.rotate(q);
271 // location(loc);
272 // }
273 
274 // void atom::translate_and_rotate(point t, quaternion q)
275 // {
276 // auto loc = location();
277 // loc += t;
278 // loc.rotate(q);
279 // location(loc);
280 // }
281 
282 // void atom::translate_rotate_and_translate(point t1, quaternion q, point t2)
283 // {
284 // auto loc = location();
285 // loc += t1;
286 // loc.rotate(q);
287 // loc += t2;
288 // location(loc);
289 // }
290 
291 std::ostream &operator<<(std::ostream &os, const atom &atom)
292 {
293  os << atom.get_label_comp_id() << ' ' << atom.get_label_asym_id() << ':' << atom.get_label_seq_id() << ' ' << atom.get_label_atom_id();
294 
295  if (atom.is_alternate())
296  os << '(' << atom.get_label_alt_id() << ')';
297  if (atom.get_auth_asym_id() != atom.get_label_asym_id() or atom.get_auth_seq_id() != std::to_string(atom.get_label_seq_id()) or atom.get_pdb_ins_code().empty() == false)
298  os << " [" << atom.get_auth_asym_id() << ':' << atom.get_auth_seq_id() << atom.get_pdb_ins_code() << ']';
299 
300  return os;
301 }
302 
303 // --------------------------------------------------------------------
304 // residue
305 
306 residue::residue(structure &structure, const std::vector<atom> &atoms)
307  : m_structure(&structure)
308 {
309  if (atoms.empty())
310  throw std::runtime_error("Empty list of atoms");
311 
312  auto &a = atoms.front();
313 
314  m_compound_id = a.get_label_comp_id();
315  m_asym_id = a.get_label_asym_id();
316  m_seq_id = a.get_label_seq_id();
317  m_auth_asym_id = a.get_auth_asym_id();
318  m_auth_seq_id = a.get_auth_seq_id();
319  m_pdb_ins_code = a.get_pdb_ins_code();
320 
321  for (auto atom : atoms)
322  m_atoms.push_back(atom);
323 }
324 
325 // residue::residue(residue &&rhs)
326 // : m_structure(rhs.m_structure)
327 // , m_compound_id(std::move(rhs.m_compound_id))
328 // , m_asym_id(std::move(rhs.m_asym_id))
329 // , m_seq_id(rhs.m_seq_id)
330 // , m_auth_seq_id(rhs.m_auth_seq_id)
331 // , m_atoms(std::move(rhs.m_atoms))
332 // {
333 // // std::cerr << "move constructor residue" << std::endl;
334 // rhs.m_structure = nullptr;
335 // }
336 
337 // residue &residue::operator=(residue &&rhs)
338 // {
339 // // std::cerr << "move assignment residue" << std::endl;
340 // m_structure = rhs.m_structure;
341 // rhs.m_structure = nullptr;
342 // m_compound_id = std::move(rhs.m_compound_id);
343 // m_asym_id = std::move(rhs.m_asym_id);
344 // m_seq_id = rhs.m_seq_id;
345 // m_auth_seq_id = rhs.m_auth_seq_id;
346 // m_atoms = std::move(rhs.m_atoms);
347 
348 // return *this;
349 // }
350 
351 // residue::~residue()
352 // {
353 // // std::cerr << "~residue" << std::endl;
354 // }
355 
356 std::string residue::get_entity_id() const
357 {
358  std::string result;
359 
360  if (not m_atoms.empty())
361  result = m_atoms.front().get_label_entity_id();
362  else if (m_structure != nullptr and not m_asym_id.empty())
363  {
364  using namespace literals;
365 
366  auto &db = m_structure->get_datablock();
367  result = db["struct_asym"].find1<std::string>("id"_key == m_asym_id, "entity_id");
368  }
369 
370  return result;
371 }
372 
373 EntityType residue::entity_type() const
374 {
375  assert(m_structure);
376  return m_structure->get_entity_type_for_entity_id(get_entity_id());
377 }
378 
379 // std::string residue::authInsCode() const
380 // {
381 // assert(m_structure);
382 
383 // std::string result;
384 // if (not m_atoms.empty())
385 // result = m_atoms.front().get_property("pdbx_PDB_ins_code");
386 
387 // return result;
388 // }
389 
390 // std::string residue::get_auth_asym_id() const
391 // {
392 // assert(m_structure);
393 
394 // std::string result;
395 // if (not m_atoms.empty())
396 // result = m_atoms.front().get_property("auth_asym_id");
397 
398 // return result;
399 // }
400 
401 // std::string residue::authSeqID() const
402 // {
403 // return m_auth_seq_id;
404 // }
405 
406 // const Compound &residue::compound() const
407 // {
408 // auto result = compound_factory::instance().create(m_compound_id);
409 // if (result == nullptr)
410 // throw std::runtime_error("Failed to create compound " + m_compound_id);
411 // return *result;
412 // }
413 
414 // std::string residue::unique_alt_id() const
415 // {
416 // if (m_structure == nullptr)
417 // throw std::runtime_error("Invalid residue object");
418 
419 // auto firstAlt = std::find_if(m_atoms.begin(), m_atoms.end(), [](auto &a)
420 // { return not a.get_label_alt_id().empty(); });
421 
422 // return firstAlt != m_atoms.end() ? firstAlt->get_label_alt_id() : "";
423 // }
424 
425 void residue::add_atom(atom &atom)
426 {
427  // atom.set_property("label_comp_id", m_compound_id);
428  // atom.set_property("label_asym_id", m_asym_id);
429  // if (m_seq_id != 0)
430  // atom.set_property("label_seq_id", std::to_string(m_seq_id));
431  // atom.set_property("auth_seq_id", m_auth_seq_id);
432 
433  m_atoms.push_back(atom);
434 }
435 
436 std::vector<atom> residue::unique_atoms() const
437 {
438  // if (m_structure == nullptr)
439  // throw std::runtime_error("Invalid residue object");
440 
441  std::vector<atom> result;
442  std::string firstAlt;
443 
444  for (auto &atom : m_atoms)
445  {
446  auto alt = atom.get_label_alt_id();
447  if (alt.empty())
448  {
449  result.push_back(atom);
450  continue;
451  }
452 
453  if (firstAlt.empty())
454  firstAlt = alt;
455  else if (alt != firstAlt)
456  {
457  if (VERBOSE > 0)
458  std::cerr << "skipping alternate atom " << atom << std::endl;
459  continue;
460  }
461 
462  result.push_back(atom);
463  }
464 
465  return result;
466 }
467 
468 std::set<std::string> residue::get_alternate_ids() const
469 {
470  std::set<std::string> result;
471 
472  for (auto a : m_atoms)
473  {
474  auto alt = a.get_label_alt_id();
475  if (not alt.empty())
476  result.insert(alt);
477  }
478 
479  return result;
480 }
481 
482 atom residue::get_atom_by_atom_id(const std::string &atom_id) const
483 {
484  atom result;
485 
486  for (auto &a : m_atoms)
487  {
488  if (a.get_label_atom_id() == atom_id)
489  {
490  result = a;
491  break;
492  }
493  }
494 
495  if (not result and VERBOSE > 1)
496  std::cerr << "atom with atom_id " << atom_id << " not found in residue " << m_asym_id << ':' << m_seq_id << std::endl;
497 
498  return result;
499 }
500 
501 // residue is a single entity if the atoms for the asym with m_asym_id is equal
502 // to the number of atoms in this residue... hope this is correct....
503 bool residue::is_entity() const
504 {
505  auto &db = m_structure->get_datablock();
506 
507  auto a1 = db["atom_site"].find(key("label_asym_id") == m_asym_id);
508  // auto a2 = atoms();
509  auto &a2 = m_atoms;
510 
511  return a1.size() == a2.size();
512 }
513 
514 std::tuple<point, float> residue::center_and_radius() const
515 {
516  std::vector<point> pts;
517  for (auto &a : m_atoms)
518  pts.push_back(a.get_location());
519 
520  auto center = centroid(pts);
521  float radius = 0;
522 
523  for (auto &pt : pts)
524  {
525  float d = static_cast<float>(distance(pt, center));
526  if (radius < d)
527  radius = d;
528  }
529 
530  return std::make_tuple(center, radius);
531 }
532 
533 bool residue::has_alternate_atoms() const
534 {
535  return std::find_if(m_atoms.begin(), m_atoms.end(), [](const atom &atom)
536  { return atom.is_alternate(); }) != m_atoms.end();
537 }
538 
539 std::set<std::string> residue::get_atom_ids() const
540 {
541  std::set<std::string> ids;
542  for (auto a : m_atoms)
543  ids.insert(a.get_label_atom_id());
544 
545  return ids;
546 }
547 
548 std::vector<atom> residue::get_atoms_by_id(const std::string &atom_id) const
549 {
550  std::vector<atom> atoms;
551  for (auto a : m_atoms)
552  {
553  if (a.get_label_atom_id() == atom_id)
554  atoms.push_back(a);
555  }
556  return atoms;
557 }
558 
559 std::ostream &operator<<(std::ostream &os, const residue &res)
560 {
561  os << res.get_compound_id() << ' ' << res.get_asym_id() << ':' << res.get_seq_id();
562 
563  if (res.get_auth_asym_id() != res.get_asym_id() or res.get_auth_seq_id() != std::to_string(res.get_seq_id()))
564  os << " [" << res.get_auth_asym_id() << ':' << res.get_auth_seq_id() << ']';
565 
566  return os;
567 }
568 
569 // --------------------------------------------------------------------
570 // monomer
571 
572 monomer::monomer(const polymer &polymer, size_t index, int seqID, const std::string &authSeqID, const std::string &pdbInsCode, const std::string &compoundID)
573  : residue(*polymer.get_structure(), compoundID, polymer.get_asym_id(), seqID, polymer.get_auth_asym_id(), authSeqID, pdbInsCode)
574  , m_polymer(&polymer)
575  , m_index(index)
576 {
577 }
578 
579 monomer::monomer(monomer &&rhs)
580  : residue(std::move(rhs))
581  , m_polymer(rhs.m_polymer)
582  , m_index(rhs.m_index)
583 {
584  rhs.m_polymer = nullptr;
585 }
586 
587 monomer &monomer::operator=(monomer &&rhs)
588 {
589  residue::operator=(std::move(rhs));
590  m_polymer = rhs.m_polymer;
591  rhs.m_polymer = nullptr;
592  m_index = rhs.m_index;
593 
594  return *this;
595 }
596 
597 bool monomer::is_first_in_chain() const
598 {
599  return m_index == 0;
600 }
601 
602 bool monomer::is_last_in_chain() const
603 {
604  return m_index + 1 == m_polymer->size();
605 }
606 
607 bool monomer::has_alpha() const
608 {
609  return m_index >= 1 and m_index + 2 < m_polymer->size();
610 }
611 
612 bool monomer::has_kappa() const
613 {
614  return m_index >= 2 and m_index + 2 < m_polymer->size();
615 }
616 
617 float monomer::phi() const
618 {
619  float result = 360;
620 
621  if (m_index > 0)
622  {
623  auto &prev = m_polymer->operator[](m_index - 1);
624  if (prev.m_seq_id + 1 == m_seq_id)
625  {
626  auto a1 = prev.C();
627  auto a2 = N();
628  auto a3 = CAlpha();
629  auto a4 = C();
630 
631  if (a1 and a2 and a3 and a4)
632  result = dihedral_angle(a1.get_location(), a2.get_location(), a3.get_location(), a4.get_location());
633  }
634  }
635 
636  return result;
637 }
638 
639 float monomer::psi() const
640 {
641  float result = 360;
642 
643  if (m_index + 1 < m_polymer->size())
644  {
645  auto &next = m_polymer->operator[](m_index + 1);
646  if (m_seq_id + 1 == next.m_seq_id)
647  {
648  auto a1 = N();
649  auto a2 = CAlpha();
650  auto a3 = C();
651  auto a4 = next.N();
652 
653  if (a1 and a2 and a3 and a4)
654  result = dihedral_angle(a1.get_location(), a2.get_location(), a3.get_location(), a4.get_location());
655  }
656  }
657 
658  return result;
659 }
660 
661 float monomer::alpha() const
662 {
663  float result = 360;
664 
665  try
666  {
667  if (m_index >= 1 and m_index + 2 < m_polymer->size())
668  {
669  auto &prev = m_polymer->operator[](m_index - 1);
670  auto &next = m_polymer->operator[](m_index + 1);
671  auto &nextNext = m_polymer->operator[](m_index + 2);
672 
673  result = static_cast<float>(dihedral_angle(prev.CAlpha().get_location(), CAlpha().get_location(), next.CAlpha().get_location(), nextNext.CAlpha().get_location()));
674  }
675  }
676  catch (const std::exception &ex)
677  {
678  if (VERBOSE > 0)
679  std::cerr << ex.what() << std::endl;
680  }
681 
682  return result;
683 }
684 
685 float monomer::kappa() const
686 {
687  float result = 360;
688 
689  try
690  {
691  if (m_index >= 2 and m_index + 2 < m_polymer->size())
692  {
693  auto &prevPrev = m_polymer->operator[](m_index - 2);
694  auto &nextNext = m_polymer->operator[](m_index + 2);
695 
696  if (prevPrev.m_seq_id + 4 == nextNext.m_seq_id)
697  {
698  double ckap = cosinus_angle(CAlpha().get_location(), prevPrev.CAlpha().get_location(), nextNext.CAlpha().get_location(), CAlpha().get_location());
699  double skap = std::sqrt(1 - ckap * ckap);
700  result = static_cast<float>(std::atan2(skap, ckap) * 180 / kPI);
701  }
702  }
703  }
704  catch (const std::exception &ex)
705  {
706  if (VERBOSE > 0)
707  std::cerr << "When trying to calculate kappa for " << m_asym_id << ':' << m_seq_id << ": "
708  << ex.what() << std::endl;
709  }
710 
711  return result;
712 }
713 
714 float monomer::tco() const
715 {
716  float result = 0.0;
717 
718  try
719  {
720  if (m_index > 0)
721  {
722  auto &prev = m_polymer->operator[](m_index - 1);
723  if (prev.m_seq_id + 1 == m_seq_id)
724  result = static_cast<float>(cosinus_angle(C().get_location(), O().get_location(), prev.C().get_location(), prev.O().get_location()));
725  }
726  }
727  catch (const std::exception &ex)
728  {
729  if (VERBOSE > 0)
730  std::cerr << "When trying to calculate tco for " << get_asym_id() << ':' << get_seq_id() << ": "
731  << ex.what() << std::endl;
732  }
733 
734  return result;
735 }
736 
737 float monomer::omega() const
738 {
739  float result = 360;
740 
741  try
742  {
743  if (not is_last_in_chain())
744  result = omega(*this, m_polymer->operator[](m_index + 1));
745  }
746  catch (const std::exception &ex)
747  {
748  if (VERBOSE > 0)
749  std::cerr << "When trying to calculate omega for " << get_asym_id() << ':' << get_seq_id() << ": "
750  << ex.what() << std::endl;
751  }
752 
753  return result;
754 }
755 
756 const std::map<std::string, std::vector<std::string>> kChiAtomsMap = {
757  {"ASP", {"CG", "OD1"}},
758  {"ASN", {"CG", "OD1"}},
759  {"ARG", {"CG", "CD", "NE", "CZ"}},
760  {"HIS", {"CG", "ND1"}},
761  {"GLN", {"CG", "CD", "OE1"}},
762  {"GLU", {"CG", "CD", "OE1"}},
763  {"SER", {"OG"}},
764  {"THR", {"OG1"}},
765  {"LYS", {"CG", "CD", "CE", "NZ"}},
766  {"TYR", {"CG", "CD1"}},
767  {"PHE", {"CG", "CD1"}},
768  {"LEU", {"CG", "CD1"}},
769  {"TRP", {"CG", "CD1"}},
770  {"CYS", {"SG"}},
771  {"ILE", {"CG1", "CD1"}},
772  {"MET", {"CG", "SD", "CE"}},
773  {"MSE", {"CG", "SE", "CE"}},
774  {"PRO", {"CG", "CD"}},
775  {"VAL", {"CG1"}}};
776 
777 size_t monomer::nr_of_chis() const
778 {
779  size_t result = 0;
780 
781  auto i = kChiAtomsMap.find(m_compound_id);
782  if (i != kChiAtomsMap.end())
783  result = i->second.size();
784 
785  return result;
786 }
787 
788 float monomer::chi(size_t nr) const
789 {
790  float result = 0;
791 
792  try
793  {
794  auto i = kChiAtomsMap.find(m_compound_id);
795  if (i != kChiAtomsMap.end() and nr < i->second.size())
796  {
797  std::vector<std::string> atoms{"N", "CA", "CB"};
798 
799  atoms.insert(atoms.end(), i->second.begin(), i->second.end());
800 
801  // in case we have a positive chiral volume we need to swap atoms
802  if (chiral_volume() > 0)
803  {
804  if (m_compound_id == "LEU")
805  atoms.back() = "CD2";
806  if (m_compound_id == "VAL")
807  atoms.back() = "CG2";
808  }
809 
810  result = static_cast<float>(dihedral_angle(
811  get_atom_by_atom_id(atoms[nr + 0]).get_location(),
812  get_atom_by_atom_id(atoms[nr + 1]).get_location(),
813  get_atom_by_atom_id(atoms[nr + 2]).get_location(),
814  get_atom_by_atom_id(atoms[nr + 3]).get_location()));
815  }
816  }
817  catch (const std::exception &e)
818  {
819  if (VERBOSE > 0)
820  std::cerr << e.what() << std::endl;
821  result = 0;
822  }
823 
824  return result;
825 }
826 
827 bool monomer::is_cis() const
828 {
829  bool result = false;
830 
831  if (m_index + 1 < m_polymer->size())
832  {
833  auto &next = m_polymer->operator[](m_index + 1);
834 
835  result = monomer::is_cis(*this, next);
836  }
837 
838  return result;
839 }
840 
841 bool monomer::is_complete() const
842 {
843  int seen = 0;
844  for (auto &a : m_atoms)
845  {
846  if (a.get_label_atom_id() == "CA")
847  seen |= 1;
848  else if (a.get_label_atom_id() == "C")
849  seen |= 2;
850  else if (a.get_label_atom_id() == "N")
851  seen |= 4;
852  else if (a.get_label_atom_id() == "O")
853  seen |= 8;
854  // else if (a.get_label_atom_id() == "OXT") seen |= 16;
855  }
856  return seen == 15;
857 }
858 
859 bool monomer::has_alternate_backbone_atoms() const
860 {
861  bool result = false;
862 
863  for (auto &a : m_atoms)
864  {
865  if (not a.is_alternate())
866  continue;
867 
868  auto atom_id = a.get_label_atom_id();
869  if (atom_id == "CA" or atom_id == "C" or atom_id == "N" or atom_id == "O")
870  {
871  result = true;
872  break;
873  }
874  }
875 
876  return result;
877 }
878 
879 float monomer::chiral_volume() const
880 {
881  float result = 0;
882 
883  if (m_compound_id == "LEU")
884  {
885  auto centre = get_atom_by_atom_id("CG");
886  auto atom1 = get_atom_by_atom_id("CB");
887  auto atom2 = get_atom_by_atom_id("CD1");
888  auto atom3 = get_atom_by_atom_id("CD2");
889 
890  result = dot_product(atom1.get_location() - centre.get_location(),
891  cross_product(atom2.get_location() - centre.get_location(), atom3.get_location() - centre.get_location()));
892  }
893  else if (m_compound_id == "VAL")
894  {
895  auto centre = get_atom_by_atom_id("CB");
896  auto atom1 = get_atom_by_atom_id("CA");
897  auto atom2 = get_atom_by_atom_id("CG1");
898  auto atom3 = get_atom_by_atom_id("CG2");
899 
900  result = dot_product(atom1.get_location() - centre.get_location(),
901  cross_product(atom2.get_location() - centre.get_location(), atom3.get_location() - centre.get_location()));
902  }
903 
904  return result;
905 }
906 
907 bool monomer::are_bonded(const monomer &a, const monomer &b, float errorMargin)
908 {
909  bool result = false;
910 
911  try
912  {
913  point atoms[4] = {
914  a.get_atom_by_atom_id("CA").get_location(),
915  a.get_atom_by_atom_id("C").get_location(),
916  b.get_atom_by_atom_id("N").get_location(),
917  b.get_atom_by_atom_id("CA").get_location()};
918 
919  auto distanceCACA = distance(atoms[0], atoms[3]);
920  double omega = dihedral_angle(atoms[0], atoms[1], atoms[2], atoms[3]);
921 
922  bool cis = std::abs(omega) <= 30.0;
923  float maxCACADistance = cis ? 3.0f : 3.8f;
924 
925  result = std::abs(distanceCACA - maxCACADistance) < errorMargin;
926  }
927  catch (...)
928  {
929  }
930 
931  return result;
932 }
933 
934 float monomer::omega(const monomer &a, const monomer &b)
935 {
936  float result = 360;
937 
938  auto a1 = a.get_atom_by_atom_id("CA");
939  auto a2 = a.get_atom_by_atom_id("C");
940  auto a3 = b.get_atom_by_atom_id("N");
941  auto a4 = b.get_atom_by_atom_id("CA");
942 
943  if (a1 and a2 and a3 and a4)
944  result = static_cast<float>(dihedral_angle(
945  a1.get_location(),
946  a2.get_location(),
947  a3.get_location(),
948  a4.get_location()));
949 
950  return result;
951 }
952 
953 bool monomer::is_cis(const monomer &a, const monomer &b)
954 {
955  return std::abs(omega(a, b)) < 30.0f;
956 }
957 
958 // --------------------------------------------------------------------
959 // polymer
960 
961 polymer::polymer(structure &s, const std::string &entityID, const std::string &asym_id, const std::string &auth_asym_id)
962  : m_structure(const_cast<structure *>(&s))
963  , m_entity_id(entityID)
964  , m_asym_id(asym_id)
965  , m_auth_asym_id(auth_asym_id)
966 {
967  using namespace cif::literals;
968 
969  std::map<size_t, size_t> ix;
970 
971  auto &poly_seq_scheme = s.get_datablock()["pdbx_poly_seq_scheme"];
972  reserve(poly_seq_scheme.size());
973 
974  for (auto r : poly_seq_scheme.find("asym_id"_key == asym_id))
975  {
976  int seqID;
977  std::optional<int> pdbSeqNum;
978  std::string compoundID, authSeqID, pdbInsCode;
979  cif::tie(seqID, authSeqID, compoundID, pdbInsCode, pdbSeqNum) = r.get("seq_id", "auth_seq_num", "mon_id", "pdb_ins_code", "pdb_seq_num");
980 
981  if (authSeqID.empty() and pdbSeqNum.has_value())
982  authSeqID = std::to_string(*pdbSeqNum);
983 
984  size_t index = size();
985 
986  // store only the first
987  if (not ix.count(seqID))
988  {
989  ix[seqID] = index;
990  emplace_back(*this, index, seqID, authSeqID, pdbInsCode, compoundID);
991  }
992  else if (VERBOSE > 0)
993  {
994  monomer m{*this, index, seqID, authSeqID, pdbInsCode, compoundID};
995  std::cerr << "Dropping alternate residue " << m << std::endl;
996  }
997  }
998 }
999 
1000 // std::string polymer::chainID() const
1001 // {
1002 // return mPolySeq.front()["pdb_strand_id"].as<std::string>();
1003 // }
1004 
1005 // monomer &polymer::getBySeqID(int seqID)
1006 // {
1007 // for (auto &m : *this)
1008 // if (m.get_seq_id() == seqID)
1009 // return m;
1010 
1011 // throw std::runtime_error("monomer with seqID " + std::to_string(seqID) + " not found in polymer " + m_asym_id);
1012 // }
1013 
1014 // const monomer &polymer::getBySeqID(int seqID) const
1015 // {
1016 // for (auto &m : *this)
1017 // if (m.get_seq_id() == seqID)
1018 // return m;
1019 
1020 // throw std::runtime_error("monomer with seqID " + std::to_string(seqID) + " not found in polymer " + m_asym_id);
1021 // }
1022 
1023 // int polymer::Distance(const monomer &a, const monomer &b) const
1024 // {
1025 // int result = std::numeric_limits<int>::max();
1026 
1027 // if (a.get_asym_id() == b.get_asym_id())
1028 // {
1029 // int ixa = std::numeric_limits<int>::max(), ixb = std::numeric_limits<int>::max();
1030 
1031 // int ix = 0, f = 0;
1032 // for (auto &m : *this)
1033 // {
1034 // if (m.get_seq_id() == a.get_seq_id())
1035 // ixa = ix, ++f;
1036 // if (m.get_seq_id() == b.get_seq_id())
1037 // ixb = ix, ++f;
1038 // if (f == 2)
1039 // {
1040 // result = std::abs(ixa - ixb);
1041 // break;
1042 // }
1043 // }
1044 // }
1045 
1046 // return result;
1047 // }
1048 
1049 // --------------------------------------------------------------------
1050 
1051 sugar::sugar(branch &branch, const std::string &compoundID,
1052  const std::string &asym_id, int authSeqID)
1053  : residue(branch.get_structure(), compoundID, asym_id, 0, asym_id, std::to_string(authSeqID), "")
1054  , m_branch(&branch)
1055 {
1056 }
1057 
1058 sugar::sugar(sugar &&rhs)
1059  : residue(std::forward<residue>(rhs))
1060  , m_branch(rhs.m_branch)
1061 {
1062 
1063 }
1064 
1065 sugar &sugar::operator=(sugar &&rhs)
1066 {
1067  if (this != &rhs)
1068  {
1069  residue::operator=(std::forward<residue>(rhs));
1070  m_branch = rhs.m_branch;
1071  }
1072 
1073  return *this;
1074 }
1075 
1076 // bool sugar::hasLinkedSugarAtLeavingO(int leavingO) const
1077 // {
1078 // return false;
1079 // }
1080 
1081 // sugar &sugar::operator[](int leavingO)
1082 // {
1083 // throw std::logic_error("not implemented");
1084 // }
1085 
1086 // const sugar &sugar::operator[](int leavingO) const
1087 // {
1088 // throw std::logic_error("not implemented");
1089 // }
1090 
1091 std::string sugar::name() const
1092 {
1093  std::string result;
1094 
1095  if (m_compound_id == "MAN")
1096  result += "alpha-D-mannopyranose";
1097  else if (m_compound_id == "BMA")
1098  result += "beta-D-mannopyranose";
1099  else if (m_compound_id == "NAG")
1100  result += "2-acetamido-2-deoxy-beta-D-glucopyranose";
1101  else if (m_compound_id == "NDG")
1102  result += "2-acetamido-2-deoxy-alpha-D-glucopyranose";
1103  else if (m_compound_id == "FUC")
1104  result += "alpha-L-fucopyranose";
1105  else if (m_compound_id == "FUL")
1106  result += "beta-L-fucopyranose";
1107  else
1108  {
1109  auto compound = compound_factory::instance().create(m_compound_id);
1110  if (compound)
1111  result += compound->name();
1112  else
1113  result += m_compound_id;
1114  }
1115 
1116  return result;
1117 }
1118 
1119 cif::mm::atom sugar::add_atom(row_initializer atom_info)
1120 {
1121  auto &db = m_structure->get_datablock();
1122  auto &atom_site = db["atom_site"];
1123 
1124  auto atom_id = atom_site.get_unique_id("");
1125 
1126  atom_info.set_value({"group_PDB", "HETATM"});
1127  atom_info.set_value({"id", atom_id});
1128  atom_info.set_value({"label_entity_id", m_branch->get_entity_id()});
1129  atom_info.set_value({"label_asym_id", m_branch->get_asym_id()});
1130  atom_info.set_value({"label_comp_id", m_compound_id});
1131  atom_info.set_value({"label_seq_id", "."});
1132  atom_info.set_value({"label_alt_id", "."});
1133  atom_info.set_value({"auth_asym_id", m_branch->get_asym_id()});
1134  atom_info.set_value({"auth_comp_id", m_compound_id});
1135  atom_info.set_value({"auth_seq_id", m_auth_seq_id});
1136  atom_info.set_value({"occupancy", 1.0, 2});
1137  atom_info.set_value({"B_iso_or_equiv", 30.0, 2});
1138  atom_info.set_value({"pdbx_PDB_model_num", 1});
1139 
1140  auto row = atom_site.emplace(std::move(atom_info));
1141  auto result = m_structure->emplace_atom(db, row);
1142 
1143  residue::add_atom(result);
1144 
1145  return result;
1146 }
1147 
1148 branch::branch(structure &structure, const std::string &asym_id, const std::string &entity_id)
1149  : m_structure(&structure)
1150  , m_asym_id(asym_id)
1151  , m_entity_id(entity_id)
1152 {
1153  using namespace literals;
1154 
1155  auto &db = structure.get_datablock();
1156  auto &struct_asym = db["struct_asym"];
1157  auto &branch_scheme = db["pdbx_branch_scheme"];
1158  auto &branch_link = db["pdbx_entity_branch_link"];
1159 
1160  for (const auto &asym_entity_id : struct_asym.find<std::string>("id"_key == asym_id, "entity_id"))
1161  {
1162  for (const auto &[comp_id, num] : branch_scheme.find<std::string, int>(
1163  "asym_id"_key == asym_id, "mon_id", "pdb_seq_num"))
1164  {
1165  emplace_back(*this, comp_id, asym_id, num);
1166  }
1167 
1168  for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
1169  "entity_id"_key == asym_entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
1170  {
1171  // if (not iequals(atom1, "c1"))
1172  // throw std::runtime_error("invalid pdbx_entity_branch_link");
1173 
1174  auto &s1 = at(num1 - 1);
1175  auto &s2 = at(num2 - 1);
1176 
1177  s1.set_link(s2.get_atom_by_atom_id(atom2));
1178  }
1179 
1180  break;
1181  }
1182 }
1183 
1184 void branch::link_atoms()
1185 {
1186  if (not empty())
1187  {
1188  using namespace literals;
1189 
1190  auto &db = m_structure->get_datablock();
1191  auto &branch_link = db["pdbx_entity_branch_link"];
1192 
1193  auto entity_id = front().get_entity_id();
1194 
1195  for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
1196  "entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
1197  {
1198  // if (not iequals(atom1, "c1"))
1199  // throw std::runtime_error("invalid pdbx_entity_branch_link");
1200 
1201  auto &s1 = at(num1 - 1);
1202  auto &s2 = at(num2 - 1);
1203 
1204  s1.set_link(s2.get_atom_by_atom_id(atom2));
1205  }
1206  }
1207 }
1208 
1209 sugar &branch::get_sugar_by_num(int nr)
1210 {
1211  auto i = find_if(begin(), end(), [nr](const sugar &s) { return s.num() == nr; });
1212  if (i == end())
1213  throw std::out_of_range("Sugar with num " + std::to_string(nr) + " not found in branch " + m_asym_id);
1214 
1215  return *i;
1216 }
1217 
1218 std::string branch::name() const
1219 {
1220  return empty() ? "" : name(front());
1221 }
1222 
1223 sugar &branch::construct_sugar(const std::string &compound_id)
1224 {
1225  auto &db = m_structure->get_datablock();
1226 
1227  auto compound = compound_factory::instance().create(compound_id);
1228  if (compound == nullptr)
1229  throw std::runtime_error("Trying to insert unknown compound " + compound_id + " (not found in CCD)");
1230 
1231  auto &chemComp = db["chem_comp"];
1232  auto r = chemComp.find(key("id") == compound_id);
1233  if (r.empty())
1234  {
1235  chemComp.emplace({
1236  {"id", compound_id},
1237  {"name", compound->name()},
1238  {"formula", compound->formula()},
1239  {"formula_weight", compound->formula_weight()},
1240  {"type", compound->type()}});
1241  }
1242 
1243  sugar &result = emplace_back(*this, compound_id, m_asym_id, static_cast<int>(size() + 1));
1244 
1245  db["pdbx_branch_scheme"].emplace({
1246  {"asym_id", result.get_asym_id()},
1247  {"entity_id", result.get_entity_id()},
1248  {"num", result.num()},
1249  {"mon_id", result.get_compound_id()},
1250 
1251  {"pdb_asym_id", result.get_asym_id()},
1252  {"pdb_seq_num", result.num()},
1253  {"pdb_mon_id", result.get_compound_id()},
1254 
1255  {"auth_asym_id", result.get_auth_asym_id()},
1256  {"auth_mon_id", result.get_compound_id()},
1257  {"auth_seq_num", result.get_auth_seq_id()},
1258 
1259  {"hetero", "n"}
1260  });
1261 
1262  return result;
1263 }
1264 
1265 sugar &branch::construct_sugar(const std::string &compound_id, const std::string &atom_id,
1266  int linked_sugar_nr, const std::string &linked_atom_id)
1267 {
1268  auto &result = construct_sugar(compound_id);
1269 
1270  auto &linked = get_sugar_by_num(linked_sugar_nr);
1271  result.set_link(linked.get_atom_by_atom_id(linked_atom_id));
1272 
1273  auto &db = m_structure->get_datablock();
1274 
1275  auto &pdbx_entity_branch_link = db["pdbx_entity_branch_link"];
1276  auto linkID = pdbx_entity_branch_link.get_unique_id("");
1277 
1278  db["pdbx_entity_branch_link"].emplace({
1279  { "link_id", linkID },
1280  { "entity_id", get_entity_id() },
1281  { "entity_branch_list_num_1", result.num() },
1282  { "comp_id_1", compound_id },
1283  { "atom_id_1", atom_id },
1284  { "leaving_atom_id_1", "O1" },
1285  { "entity_branch_list_num_2", linked.num() },
1286  { "comp_id_2", linked.get_compound_id() },
1287  { "atom_id_2", linked_atom_id },
1288  { "leaving_atom_id_2", "." },
1289  { "value_order", "sing" }
1290  });
1291 
1292  return result;
1293 }
1294 
1295 std::string branch::name(const sugar &s) const
1296 {
1297  using namespace literals;
1298 
1299  std::string result;
1300 
1301  for (auto &sn : *this)
1302  {
1303  if (not sn.get_link() or sn.get_link().get_auth_seq_id() != s.get_auth_seq_id())
1304  continue;
1305 
1306  auto n = name(sn) + "-(1-" + sn.get_link().get_label_atom_id().substr(1) + ')';
1307 
1308  result = result.empty() ? n : result + "-[" + n + ']';
1309  }
1310 
1311  if (not result.empty() and result.back() != ']')
1312  result += '-';
1313 
1314  return result + s.name();
1315 }
1316 
1317 float branch::weight() const
1318 {
1319  return std::accumulate(begin(), end(), 0.f, [](float sum, const sugar &s)
1320  {
1321  auto compound = compound_factory::instance().create(s.get_compound_id());
1322  if (compound)
1323  sum += compound->formula_weight();
1324  return sum; });
1325 }
1326 
1327 // --------------------------------------------------------------------
1328 // structure
1329 
1330 structure::structure(file &p, size_t modelNr, StructureOpenOptions options)
1331  : structure(p.front(), modelNr, options)
1332 {
1333 }
1334 
1335 structure::structure(datablock &db, size_t modelNr, StructureOpenOptions options)
1336  : m_db(db)
1337  , m_model_nr(modelNr)
1338 {
1339  auto &atomCat = db["atom_site"];
1340 
1341  load_atoms_for_model(options);
1342 
1343  // Check to see if we should actually load another model?
1344  if (m_atoms.empty() and m_model_nr == 1)
1345  {
1346  std::optional<size_t> model_nr;
1347  cif::tie(model_nr) = atomCat.front().get("pdbx_PDB_model_num");
1348  if (model_nr and *model_nr != m_model_nr)
1349  {
1350  if (VERBOSE > 0)
1351  std::cerr << "No atoms loaded for model 1, trying model " << *model_nr << std::endl;
1352  m_model_nr = *model_nr;
1353  load_atoms_for_model(options);
1354  }
1355  }
1356 
1357  if (m_atoms.empty())
1358  {
1359  if (VERBOSE >= 0)
1360  std::cerr << "Warning: no atoms loaded" << std::endl;
1361  }
1362  else
1363  load_data();
1364 }
1365 
1366 void structure::load_atoms_for_model(StructureOpenOptions options)
1367 {
1368  auto &atomCat = m_db["atom_site"];
1369 
1370  for (const auto &a : atomCat)
1371  {
1372  std::string id, type_symbol;
1373  std::optional<size_t> model_nr;
1374 
1375  cif::tie(id, type_symbol, model_nr) = a.get("id", "type_symbol", "pdbx_PDB_model_num");
1376 
1377  if (model_nr and *model_nr != m_model_nr)
1378  continue;
1379 
1380  if ((options bitand StructureOpenOptions::SkipHydrogen) and (type_symbol == "H" or type_symbol == "D"))
1381  continue;
1382 
1383  emplace_atom(std::make_shared<atom::atom_impl>(m_db, id));
1384  }
1385 }
1386 
1387 // structure::structure(const structure &s)
1388 // : m_db(s.m_db)
1389 // , m_model_nr(s.m_model_nr)
1390 // {
1391 // m_atoms.reserve(s.m_atoms.size());
1392 // for (auto &atom : s.m_atoms)
1393 // emplace_atom(atom.clone());
1394 
1395 // load_data();
1396 // }
1397 
1398 // structure::~structure()
1399 // {
1400 // }
1401 
1402 void structure::load_data()
1403 {
1404  auto &polySeqScheme = m_db["pdbx_poly_seq_scheme"];
1405 
1406  for (const auto &[asym_id, auth_asym_id, entityID] : polySeqScheme.rows<std::string,std::string,std::string>("asym_id", "pdb_strand_id", "entity_id"))
1407  {
1408  if (m_polymers.empty() or m_polymers.back().get_asym_id() != asym_id or m_polymers.back().get_entity_id() != entityID)
1409  m_polymers.emplace_back(*this, entityID, asym_id, auth_asym_id);
1410  }
1411 
1412  auto &branchScheme = m_db["pdbx_branch_scheme"];
1413 
1414  for (const auto &[asym_id, entity_id] : branchScheme.rows<std::string,std::string>("asym_id", "entity_id"))
1415  {
1416  if (m_branches.empty() or m_branches.back().get_asym_id() != asym_id)
1417  m_branches.emplace_back(*this, asym_id, entity_id);
1418  }
1419 
1420  auto &nonPolyScheme = m_db["pdbx_nonpoly_scheme"];
1421 
1422  for (const auto&[asym_id, monID, pdbStrandID, pdbSeqNum, pdbInsCode] :
1423  nonPolyScheme.rows<std::string,std::string,std::string,std::string,std::string>("asym_id", "mon_id", "pdb_strand_id", "pdb_seq_num", "pdb_ins_code"))
1424  m_non_polymers.emplace_back(*this, monID, asym_id, 0, pdbStrandID, pdbSeqNum, pdbInsCode);
1425 
1426  // place atoms in residues
1427 
1428  using key_type = std::tuple<std::string, int, std::string>;
1429  std::map<key_type, residue *> resMap;
1430 
1431  for (auto &poly : m_polymers)
1432  {
1433  for (auto &res : poly)
1434  resMap[{res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id()}] = &res;
1435  }
1436 
1437  for (auto &res : m_non_polymers)
1438  resMap[{res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id()}] = &res;
1439 
1440  std::set<std::string> sugars;
1441  for (auto &branch : m_branches)
1442  {
1443  for (auto &sugar : branch)
1444  {
1445  resMap[{sugar.get_asym_id(), sugar.get_seq_id(), sugar.get_auth_seq_id()}] = &sugar;
1446  sugars.insert(sugar.get_compound_id());
1447  }
1448  }
1449 
1450  for (auto &atom : m_atoms)
1451  {
1452  key_type k(atom.get_label_asym_id(), atom.get_label_seq_id(), atom.get_auth_seq_id());
1453  auto ri = resMap.find(k);
1454 
1455  if (ri == resMap.end())
1456  {
1457  if (VERBOSE > 0)
1458  std::cerr << "Missing residue for atom " << atom << std::endl;
1459 
1460  // see if it might match a non poly
1461  for (auto &res : m_non_polymers)
1462  {
1463  if (res.get_asym_id() != atom.get_label_asym_id())
1464  continue;
1465 
1466  res.add_atom(atom);
1467  break;
1468  }
1469 
1470  continue;
1471  }
1472 
1473  ri->second->add_atom(atom);
1474  }
1475 
1476  // what the ...
1477  m_branches.erase(std::remove_if(m_branches.begin(), m_branches.end(), [](const branch &b) { return b.empty(); }), m_branches.end());
1478 
1479  for (auto &branch : m_branches)
1480  branch.link_atoms();
1481 }
1482 
1483 EntityType structure::get_entity_type_for_entity_id(const std::string entityID) const
1484 {
1485  using namespace literals;
1486 
1487  auto &entity = m_db["entity"];
1488  auto entity_type = entity.find1<std::string>("id"_key == entityID, "type");
1489 
1490  EntityType result;
1491 
1492  if (iequals(entity_type, "polymer"))
1493  result = EntityType::polymer;
1494  else if (iequals(entity_type, "non-polymer"))
1495  result = EntityType::NonPolymer;
1496  else if (iequals(entity_type, "macrolide"))
1497  result = EntityType::Macrolide;
1498  else if (iequals(entity_type, "water"))
1499  result = EntityType::Water;
1500  else if (iequals(entity_type, "branched"))
1501  result = EntityType::Branched;
1502  else
1503  throw std::runtime_error("Unknown entity type " + entity_type);
1504 
1505  return result;
1506 }
1507 
1508 EntityType structure::get_entity_type_for_asym_id(const std::string asym_id) const
1509 {
1510  using namespace literals;
1511 
1512  auto &struct_asym = m_db["struct_asym"];
1513  auto entityID = struct_asym.find1<std::string>("id"_key == asym_id, "entity_id");
1514 
1515  return get_entity_type_for_entity_id(entityID);
1516 }
1517 
1518 // std::vector<atom> structure::waters() const
1519 // {
1520 // using namespace literals;
1521 
1522 // std::vector<atom> result;
1523 
1524 // auto &db = datablock();
1525 
1526 // // Get the entity id for water. Watch out, structure may not have water at all
1527 // auto &entityCat = db["entity"];
1528 // for (const auto &[waterEntityID] : entityCat.find<std::string>("type"_key == "water", "id"))
1529 // {
1530 // for (auto &a : m_atoms)
1531 // {
1532 // if (a.get_property("label_entity_id") == waterEntityID)
1533 // result.push_back(a);
1534 // }
1535 
1536 // break;
1537 // }
1538 
1539 // return result;
1540 // }
1541 
1542 atom structure::get_atom_by_id(const std::string &id) const
1543 {
1544  assert(m_atoms.size() == m_atom_index.size());
1545 
1546  int L = 0, R = static_cast<int>(m_atoms.size() - 1);
1547  while (L <= R)
1548  {
1549  int i = (L + R) / 2;
1550 
1551  const atom &atom = m_atoms[m_atom_index[i]];
1552 
1553  int d = atom.id().compare(id);
1554 
1555  if (d == 0)
1556  return atom;
1557 
1558  if (d < 0)
1559  L = i + 1;
1560  else
1561  R = i - 1;
1562  }
1563 
1564  throw std::out_of_range("Could not find atom with id " + id);
1565 }
1566 
1567 atom structure::get_atom_by_label(const std::string &atom_id, const std::string &asym_id, const std::string &compID, int seqID, const std::string &altID)
1568 {
1569  for (auto &a : m_atoms)
1570  {
1571  if (a.get_label_atom_id() == atom_id and
1572  a.get_label_asym_id() == asym_id and
1573  a.get_label_comp_id() == compID and
1574  a.get_label_seq_id() == seqID and
1575  a.get_label_alt_id() == altID)
1576  {
1577  return a;
1578  }
1579  }
1580 
1581  throw std::out_of_range("Could not find atom with specified label");
1582 }
1583 
1584 atom structure::get_atom_by_position(point p) const
1585 {
1586  double dist = std::numeric_limits<double>::max();
1587  size_t index = std::numeric_limits<size_t>::max();
1588 
1589  for (size_t i = 0; i < m_atoms.size(); ++i)
1590  {
1591  auto &a = m_atoms.at(i);
1592 
1593  auto d = distance(a.get_location(), p);
1594  if (d < dist)
1595  {
1596  dist = d;
1597  index = i;
1598  }
1599  }
1600 
1601  if (index < m_atoms.size())
1602  return m_atoms.at(index);
1603 
1604  return {};
1605 }
1606 
1607 atom structure::get_atom_by_position_and_type(point p, std::string_view type, std::string_view res_type) const
1608 {
1609  double dist = std::numeric_limits<double>::max();
1610  size_t index = std::numeric_limits<size_t>::max();
1611 
1612  for (size_t i = 0; i < m_atoms.size(); ++i)
1613  {
1614  auto &a = m_atoms.at(i);
1615 
1616  if (a.get_label_comp_id() != res_type)
1617  continue;
1618 
1619  if (a.get_label_atom_id() != type)
1620  continue;
1621 
1622  auto d = distance(a.get_location(), p);
1623  if (dist > d)
1624  {
1625  dist = d;
1626  index = i;
1627  }
1628  }
1629 
1630  if (index < m_atoms.size())
1631  return m_atoms.at(index);
1632 
1633  return {};
1634 }
1635 
1636 polymer &structure::get_polymer_by_asym_id(const std::string &asym_id)
1637 {
1638  for (auto &poly : m_polymers)
1639  {
1640  if (poly.get_asym_id() != asym_id)
1641  continue;
1642 
1643  return poly;
1644  }
1645 
1646  throw std::runtime_error("polymer with asym id " + asym_id + " not found");
1647 }
1648 
1649 residue &structure::create_residue(const std::vector<atom> &atoms)
1650 {
1651  return m_non_polymers.emplace_back(*this, atoms);
1652 }
1653 
1654 residue &structure::get_residue(const std::string &asym_id, int seqID, const std::string &authSeqID)
1655 {
1656  if (seqID == 0)
1657  {
1658  for (auto &res : m_non_polymers)
1659  {
1660  if (res.get_asym_id() == asym_id and (authSeqID.empty() or res.get_auth_seq_id() == authSeqID))
1661  return res;
1662  }
1663  }
1664 
1665  for (auto &poly : m_polymers)
1666  {
1667  if (poly.get_asym_id() != asym_id)
1668  continue;
1669 
1670  for (auto &res : poly)
1671  {
1672  if (res.get_seq_id() == seqID)
1673  return res;
1674  }
1675  }
1676 
1677  for (auto &branch : m_branches)
1678  {
1679  if (branch.get_asym_id() != asym_id)
1680  continue;
1681 
1682  for (auto &sugar : branch)
1683  {
1684  if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == authSeqID)
1685  return sugar;
1686  }
1687  }
1688 
1689  std::string desc = asym_id;
1690 
1691  if (seqID != 0)
1692  desc += "/" + std::to_string(seqID);
1693 
1694  if (not authSeqID.empty())
1695  desc += "-" + authSeqID;
1696 
1697  throw std::out_of_range("Could not find residue " + desc);
1698 }
1699 
1700 residue &structure::get_residue(const std::string &asym_id, const std::string &compID, int seqID, const std::string &authSeqID)
1701 {
1702  if (seqID == 0)
1703  {
1704  for (auto &res : m_non_polymers)
1705  {
1706  if (res.get_asym_id() == asym_id and res.get_auth_seq_id() == authSeqID and res.get_compound_id() == compID)
1707  return res;
1708  }
1709  }
1710 
1711  for (auto &poly : m_polymers)
1712  {
1713  if (poly.get_asym_id() != asym_id)
1714  continue;
1715 
1716  for (auto &res : poly)
1717  {
1718  if (res.get_seq_id() == seqID and res.get_compound_id() == compID)
1719  return res;
1720  }
1721  }
1722 
1723  for (auto &branch : m_branches)
1724  {
1725  if (branch.get_asym_id() != asym_id)
1726  continue;
1727 
1728  for (auto &sugar : branch)
1729  {
1730  if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == authSeqID and sugar.get_compound_id() == compID)
1731  return sugar;
1732  }
1733  }
1734 
1735  std::string desc = asym_id;
1736 
1737  if (seqID != 0)
1738  desc += "/" + std::to_string(seqID);
1739 
1740  if (not authSeqID.empty())
1741  desc += "-" + authSeqID;
1742 
1743  throw std::out_of_range("Could not find residue " + desc + " of type " + compID);
1744 }
1745 
1746 branch &structure::get_branch_by_asym_id(const std::string &asym_id)
1747 {
1748  for (auto &branch : m_branches)
1749  {
1750  if (branch.get_asym_id() == asym_id)
1751  return branch;
1752  }
1753 
1754  throw std::runtime_error("branch not found for asym id " + asym_id);
1755 }
1756 
1757 std::string structure::insert_compound(const std::string &compoundID, bool is_entity)
1758 {
1759  using namespace literals;
1760 
1761  auto compound = compound_factory::instance().create(compoundID);
1762  if (compound == nullptr)
1763  throw std::runtime_error("Trying to insert unknown compound " + compoundID + " (not found in CCD)");
1764 
1765  auto &chemComp = m_db["chem_comp"];
1766  auto r = chemComp.find(key("id") == compoundID);
1767  if (r.empty())
1768  {
1769  chemComp.emplace({
1770  {"id", compoundID},
1771  {"name", compound->name()},
1772  {"formula", compound->formula()},
1773  {"formula_weight", compound->formula_weight()},
1774  {"type", compound->type()}});
1775  }
1776 
1777  std::string entity_id;
1778 
1779  if (is_entity)
1780  {
1781  auto &pdbxEntityNonpoly = m_db["pdbx_entity_nonpoly"];
1782 
1783  entity_id = pdbxEntityNonpoly.find_first<std::string>("comp_id"_key == compoundID, "entity_id");
1784 
1785  if (entity_id.empty())
1786  {
1787  auto &entity = m_db["entity"];
1788  entity_id = entity.get_unique_id("");
1789 
1790  entity.emplace({
1791  {"id", entity_id},
1792  {"type", "non-polymer"},
1793  {"pdbx_description", compound->name()},
1794  {"formula_weight", compound->formula_weight()}});
1795 
1796  pdbxEntityNonpoly.emplace({
1797  {"entity_id", entity_id},
1798  {"name", compound->name()},
1799  {"comp_id", compoundID}});
1800  }
1801  }
1802 
1803  return entity_id;
1804 }
1805 
1806 // --------------------------------------------------------------------
1807 
1808 atom &structure::emplace_atom(atom &&atom)
1809 {
1810  int L = 0, R = static_cast<int>(m_atom_index.size() - 1);
1811  while (L <= R)
1812  {
1813  int i = (L + R) / 2;
1814 
1815  auto &ai = m_atoms[m_atom_index[i]];
1816 
1817  int d = ai.id().compare(atom.id());
1818 
1819  if (d == 0)
1820  throw std::runtime_error("Duplicate atom ID " + atom.id());
1821 
1822  if (d < 0)
1823  L = i + 1;
1824  else
1825  R = i - 1;
1826  }
1827 
1828  if (R == -1) // msvc...
1829  m_atom_index.insert(m_atom_index.begin(), m_atoms.size());
1830  else
1831  m_atom_index.insert(m_atom_index.begin() + R + 1, m_atoms.size());
1832 
1833  // make sure the atom_type is known
1834  auto &atom_type = m_db["atom_type"];
1835  std::string symbol = atom.get_property("type_symbol");
1836 
1837  using namespace cif::literals;
1838  if (not atom_type.exists("symbol"_key == symbol))
1839  atom_type.emplace({ { "symbol", symbol } });
1840 
1841  return m_atoms.emplace_back(std::move(atom));
1842 }
1843 
1844 void structure::remove_atom(atom &a, bool removeFromResidue)
1845 {
1846  using namespace literals;
1847 
1848  auto &atomSites = m_db["atom_site"];
1849 
1850  if (removeFromResidue)
1851  {
1852  try
1853  {
1854  auto &res = get_residue(a);
1855  res.m_atoms.erase(std::remove(res.m_atoms.begin(), res.m_atoms.end(), a), res.m_atoms.end());
1856  }
1857  catch (const std::exception &ex)
1858  {
1859  if (VERBOSE > 0)
1860  std::cerr << "Error removing atom from residue: " << ex.what() << std::endl;
1861  }
1862  }
1863 
1864  atomSites.erase("id"_key == a.id());
1865 
1866  assert(m_atom_index.size() == m_atoms.size());
1867 
1868 #ifndef NDEBUG
1869  bool removed = false;
1870 #endif
1871 
1872  int L = 0, R = static_cast<int>(m_atom_index.size() - 1);
1873  while (L <= R)
1874  {
1875  int i = (L + R) / 2;
1876 
1877  const atom &atom = m_atoms[m_atom_index[i]];
1878 
1879  int d = atom.id().compare(a.id());
1880 
1881  if (d == 0)
1882  {
1883  m_atoms.erase(m_atoms.begin() + m_atom_index[i]);
1884 
1885  auto ai = m_atom_index[i];
1886  m_atom_index.erase(m_atom_index.begin() + i);
1887 
1888  for (auto &j : m_atom_index)
1889  {
1890  if (j > ai)
1891  --j;
1892  }
1893 #ifndef NDEBUG
1894  removed = true;
1895 #endif
1896  break;
1897  }
1898 
1899  if (d < 0)
1900  L = i + 1;
1901  else
1902  R = i - 1;
1903  }
1904 #ifndef NDEBUG
1905  assert(removed);
1906 #endif
1907 }
1908 
1909 void structure::swap_atoms(atom a1, atom a2)
1910 {
1911  auto &atomSites = m_db["atom_site"];
1912 
1913  try
1914  {
1915  auto r1 = atomSites.find1(key("id") == a1.id());
1916  auto r2 = atomSites.find1(key("id") == a2.id());
1917 
1918  auto l1 = r1["label_atom_id"];
1919  auto l2 = r2["label_atom_id"];
1920  l1.swap(l2);
1921 
1922  auto l3 = r1["auth_atom_id"];
1923  auto l4 = r2["auth_atom_id"];
1924  l3.swap(l4);
1925  }
1926  catch (const std::exception &ex)
1927  {
1928  std::throw_with_nested(std::runtime_error("Failed to swap atoms"));
1929  }
1930 }
1931 
1932 void structure::move_atom(atom a, point p)
1933 {
1934  a.set_location(p);
1935 }
1936 
1937 void structure::change_residue(residue &res, const std::string &newCompound,
1938  const std::vector<std::tuple<std::string, std::string>> &remappedAtoms)
1939 {
1940  using namespace literals;
1941 
1942  std::string asym_id = res.get_asym_id();
1943 
1944  const auto compound = compound_factory::instance().create(newCompound);
1945  if (not compound)
1946  throw std::runtime_error("Unknown compound " + newCompound);
1947 
1948  // First make sure the compound is already known or insert it.
1949  // And if the residue is an entity, we must make sure it exists
1950 
1951  std::string entityID;
1952  if (res.is_entity())
1953  {
1954  // create a copy of the entity first
1955  auto &entity = m_db["entity"];
1956 
1957  entityID = entity.find_first<std::string>("type"_key == "non-polymer" and "pdbx_description"_key == compound->name(), "id");
1958 
1959  if (entityID.empty())
1960  {
1961  entityID = entity.get_unique_id("");
1962  entity.emplace({{"id", entityID},
1963  {"type", "non-polymer"},
1964  {"pdbx_description", compound->name()},
1965  {"formula_weight", compound->formula_weight()}});
1966 
1967  auto &pdbxEntityNonpoly = m_db["pdbx_entity_nonpoly"];
1968  pdbxEntityNonpoly.emplace({{"entity_id", entityID},
1969  {"name", compound->name()},
1970  {"comp_id", newCompound}});
1971  }
1972 
1973  auto &pdbxNonPolyScheme = m_db["pdbx_nonpoly_scheme"];
1974  for (auto nps : pdbxNonPolyScheme.find("asym_id"_key == asym_id))
1975  {
1976  nps.assign("mon_id", newCompound, true);
1977  nps.assign("pdb_mon_id", newCompound, true);
1978  nps.assign("auth_mon_id", newCompound, true);
1979  nps.assign("entity_id", entityID, true);
1980  }
1981 
1982  // create rest
1983  auto &chemComp = m_db["chem_comp"];
1984  if (not chemComp.exists(key("id") == newCompound))
1985  {
1986  chemComp.emplace({{"id", newCompound},
1987  {"name", compound->name()},
1988  {"formula", compound->formula()},
1989  {"formula_weight", compound->formula_weight()},
1990  {"type", compound->type()}});
1991  }
1992 
1993  // update the struct_asym for the new entity
1994  m_db["struct_asym"].update_value("id"_key == asym_id, "entity_id", entityID);
1995  }
1996  else
1997  insert_compound(newCompound, false);
1998 
1999  res.set_compound_id(newCompound);
2000 
2001  auto &atomSites = m_db["atom_site"];
2002  auto atoms = res.atoms();
2003 
2004  for (const auto &[a1, a2] : remappedAtoms)
2005  {
2006  auto i = find_if(atoms.begin(), atoms.end(), [id = a1](const atom &a)
2007  { return a.get_label_atom_id() == id; });
2008  if (i == atoms.end())
2009  {
2010  if (VERBOSE >= 0)
2011  std::cerr << "Missing atom for atom ID " << a1 << std::endl;
2012  continue;
2013  }
2014 
2015  auto r = atomSites.find(key("id") == i->id());
2016 
2017  if (r.size() != 1)
2018  continue;
2019 
2020  if (a2.empty() or a2 == ".")
2021  remove_atom(*i);
2022  else if (a1 != a2)
2023  {
2024  auto ra = r.front();
2025  ra["label_atom_id"] = a2;
2026  ra["auth_atom_id"] = a2;
2027  ra["type_symbol"] = atom_type_traits(compound->get_atom_by_atom_id(a2).type_symbol).symbol();
2028  }
2029  }
2030 
2031  for (auto a : atoms)
2032  {
2033  atomSites.update_value(key("id") == a.id(), "label_comp_id", newCompound);
2034  atomSites.update_value(key("id") == a.id(), "auth_comp_id", newCompound);
2035  }
2036 }
2037 
2038 void structure::remove_residue(const std::string &asym_id, int seq_id, const std::string &auth_seq_id)
2039 {
2040  if (seq_id == 0)
2041  {
2042  for (auto &res : m_non_polymers)
2043  {
2044  if (res.get_asym_id() == asym_id and (auth_seq_id.empty() or res.get_auth_seq_id() == auth_seq_id))
2045  {
2046  remove_residue(res);
2047  return;
2048  }
2049  }
2050  }
2051 
2052  for (auto &poly : m_polymers)
2053  {
2054  if (poly.get_asym_id() != asym_id)
2055  continue;
2056 
2057  for (auto &res : poly)
2058  {
2059  if (res.get_seq_id() == seq_id)
2060  {
2061  remove_residue(res);
2062  return;
2063  }
2064  }
2065  }
2066 
2067  for (auto &branch : m_branches)
2068  {
2069  if (branch.get_asym_id() != asym_id)
2070  continue;
2071 
2072  for (auto &sugar : branch)
2073  {
2074  if (sugar.get_asym_id() == asym_id and sugar.get_auth_seq_id() == auth_seq_id)
2075  {
2076  remove_residue(sugar);
2077  return;
2078  }
2079  }
2080  }
2081 }
2082 
2083 void structure::remove_residue(residue &res)
2084 {
2085  using namespace literals;
2086 
2087  auto atoms = res.atoms();
2088 
2089  switch (res.entity_type())
2090  {
2091  case EntityType::polymer:
2092  {
2093  auto &m = dynamic_cast<monomer &>(res);
2094 
2095  m_db["pdbx_poly_seq_scheme"].erase(
2096  "asym_id"_key == res.get_asym_id() and
2097  "seq_id"_key == res.get_seq_id());
2098 
2099  for (auto &poly : m_polymers)
2100  poly.erase(std::remove(poly.begin(), poly.end(), m), poly.end());
2101  break;
2102  }
2103 
2104  case EntityType::NonPolymer:
2105  m_db["pdbx_nonpoly_scheme"].erase("asym_id"_key == res.get_asym_id());
2106  m_db["struct_asym"].erase("id"_key == res.get_asym_id());
2107  m_non_polymers.erase(std::remove(m_non_polymers.begin(), m_non_polymers.end(), res), m_non_polymers.end());
2108  break;
2109 
2110  case EntityType::Water:
2111  m_db["pdbx_nonpoly_scheme"].erase("asym_id"_key == res.get_asym_id());
2112  m_non_polymers.erase(std::remove(m_non_polymers.begin(), m_non_polymers.end(), res), m_non_polymers.end());
2113  break;
2114 
2115  case EntityType::Branched:
2116  {
2117  auto &s = dynamic_cast<sugar&>(res);
2118 
2119  remove_sugar(s);
2120 
2121  atoms.clear();
2122  break;
2123  }
2124 
2125  case EntityType::Macrolide:
2126  // TODO: Fix this?
2127  throw std::runtime_error("no support for macrolides yet");
2128  }
2129 
2130  for (auto atom : atoms)
2131  remove_atom(atom, false);
2132 }
2133 
2134 void structure::remove_sugar(sugar &s)
2135 {
2136  using namespace literals;
2137 
2138  std::string asym_id = s.get_asym_id();
2139  branch &branch = get_branch_by_asym_id(asym_id);
2140  auto si = std::find(branch.begin(), branch.end(), s);
2141  if (si == branch.end())
2142  throw std::runtime_error("sugar not part of branch");
2143  size_t six = si - branch.begin();
2144 
2145  if (six == 0) // first sugar, means the death of this branch
2146  remove_branch(branch);
2147  else
2148  {
2149  std::set<size_t> dix;
2150  std::stack<size_t> test;
2151  test.push(s.num());
2152 
2153  while (not test.empty())
2154  {
2155  auto tix = test.top();
2156  test.pop();
2157 
2158  if (dix.count(tix))
2159  continue;
2160 
2161  dix.insert(tix);
2162 
2163  for (auto &s2 : branch)
2164  {
2165  if (s2.get_link_nr() == tix)
2166  test.push(s2.num());
2167  }
2168 
2169  for (auto atom : branch[tix - 1].atoms())
2170  remove_atom(atom, false);
2171  }
2172 
2173  branch.erase(remove_if(branch.begin(), branch.end(), [dix](const sugar &s) { return dix.count(s.num()); }), branch.end());
2174 
2175  auto entity_id = create_entity_for_branch(branch);
2176 
2177  // Update the entity id of the asym
2178  auto &struct_asym = m_db["struct_asym"];
2179  auto r = struct_asym.find1("id"_key == asym_id);
2180  r["entity_id"] = entity_id;
2181 
2182  for (auto &sugar : branch)
2183  {
2184  for (auto atom : sugar.atoms())
2185  atom.set_property("label_entity_id", entity_id);
2186  }
2187 
2188  auto &pdbx_branch_scheme = m_db["pdbx_branch_scheme"];
2189  pdbx_branch_scheme.erase("asym_id"_key == asym_id);
2190 
2191  for (auto &sugar : branch)
2192  {
2193  pdbx_branch_scheme.emplace({
2194  {"asym_id", asym_id},
2195  {"entity_id", entity_id},
2196  {"num", sugar.num()},
2197  {"mon_id", sugar.get_compound_id()},
2198 
2199  {"pdb_asym_id", asym_id},
2200  {"pdb_seq_num", sugar.num()},
2201  {"pdb_mon_id", sugar.get_compound_id()},
2202 
2203  // TODO: need fix, collect from nag_atoms?
2204  {"auth_asym_id", asym_id},
2205  {"auth_mon_id", sugar.get_compound_id()},
2206  {"auth_seq_num", sugar.get_auth_seq_id()},
2207 
2208  {"hetero", "n"}
2209  });
2210  }
2211  }
2212 }
2213 
2214 void structure::remove_branch(branch &branch)
2215 {
2216  using namespace literals;
2217 
2218  for (auto &sugar : branch)
2219  {
2220  auto atoms = sugar.atoms();
2221  for (auto atom : atoms)
2222  remove_atom(atom);
2223  }
2224 
2225  m_db["pdbx_branch_scheme"].erase("asym_id"_key == branch.get_asym_id());
2226  m_db["struct_asym"].erase("id"_key == branch.get_asym_id());
2227 
2228  m_branches.erase(remove(m_branches.begin(), m_branches.end(), branch), m_branches.end());
2229 }
2230 
2231 std::string structure::create_non_poly_entity(const std::string &comp_id)
2232 {
2233  return insert_compound(comp_id, true);
2234 }
2235 
2236 std::string structure::create_non_poly(const std::string &entity_id, const std::vector<atom> &atoms)
2237 {
2238  using namespace cif::literals;
2239 
2240  auto &struct_asym = m_db["struct_asym"];
2241  std::string asym_id = struct_asym.get_unique_id();
2242 
2243  struct_asym.emplace({
2244  {"id", asym_id},
2245  {"pdbx_blank_PDB_chainid_flag", "N"},
2246  {"pdbx_modified", "N"},
2247  {"entity_id", entity_id},
2248  {"details", "?"}
2249  });
2250 
2251  std::string comp_id = m_db["pdbx_entity_nonpoly"].find1<std::string>("entity_id"_key == entity_id, "comp_id");
2252 
2253  auto &atom_site = m_db["atom_site"];
2254 
2255  auto &res = m_non_polymers.emplace_back(*this, comp_id, asym_id, 0, asym_id, "1", "");
2256 
2257  for (auto &atom : atoms)
2258  {
2259  auto atom_id = atom_site.get_unique_id("");
2260 
2261  auto row = atom_site.emplace({
2262  {"group_PDB", atom.get_property("group_PDB")},
2263  {"id", atom_id},
2264  {"type_symbol", atom.get_property("type_symbol")},
2265  {"label_atom_id", atom.get_property("label_atom_id")},
2266  {"label_alt_id", atom.get_property("label_alt_id")},
2267  {"label_comp_id", comp_id},
2268  {"label_asym_id", asym_id},
2269  {"label_entity_id", entity_id},
2270  {"label_seq_id", "."},
2271  {"pdbx_PDB_ins_code", ""},
2272  {"Cartn_x", atom.get_property("Cartn_x")},
2273  {"Cartn_y", atom.get_property("Cartn_y")},
2274  {"Cartn_z", atom.get_property("Cartn_z")},
2275  {"occupancy", atom.get_property("occupancy")},
2276  {"B_iso_or_equiv", atom.get_property("B_iso_or_equiv")},
2277  {"pdbx_formal_charge", atom.get_property("pdbx_formal_charge")},
2278  {"auth_seq_id", 1},
2279  {"auth_comp_id", comp_id},
2280  {"auth_asym_id", asym_id},
2281  {"auth_atom_id", atom.get_property("label_atom_id")},
2282  {"pdbx_PDB_model_num", 1}
2283  });
2284 
2285  auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
2286  res.add_atom(newAtom);
2287  }
2288 
2289  auto &pdbx_nonpoly_scheme = m_db["pdbx_nonpoly_scheme"];
2290  size_t ndb_nr = pdbx_nonpoly_scheme.find("asym_id"_key == asym_id and "entity_id"_key == entity_id).size() + 1;
2291  pdbx_nonpoly_scheme.emplace({
2292  {"asym_id", asym_id},
2293  {"entity_id", entity_id},
2294  {"mon_id", comp_id},
2295  {"ndb_seq_num", ndb_nr},
2296  {"pdb_seq_num", res.get_auth_seq_id()},
2297  {"auth_seq_num", res.get_auth_seq_id()},
2298  {"pdb_mon_id", comp_id},
2299  {"auth_mon_id", comp_id},
2300  {"pdb_strand_id", asym_id},
2301  {"pdb_ins_code", "."},
2302  });
2303 
2304  return asym_id;
2305 }
2306 
2307 std::string structure::create_non_poly(const std::string &entity_id, std::vector<row_initializer> atoms)
2308 {
2309  using namespace literals;
2310 
2311  auto &struct_asym = m_db["struct_asym"];
2312  std::string asym_id = struct_asym.get_unique_id();
2313 
2314  struct_asym.emplace({
2315  {"id", asym_id},
2316  {"pdbx_blank_PDB_chainid_flag", "N"},
2317  {"pdbx_modified", "N"},
2318  {"entity_id", entity_id},
2319  {"details", "?"}
2320  });
2321 
2322  std::string comp_id = m_db["pdbx_entity_nonpoly"].find1<std::string>("entity_id"_key == entity_id, "comp_id");
2323 
2324  auto &atom_site = m_db["atom_site"];
2325 
2326  auto &res = m_non_polymers.emplace_back(*this, comp_id, asym_id, 0, asym_id, "1", "");
2327 
2328  for (auto &atom : atoms)
2329  {
2330  auto atom_id = atom_site.get_unique_id("");
2331 
2332  atom.set_value("name", atom_id);
2333  atom.set_value("label_asym_id", asym_id);
2334  atom.set_value("auth_asym_id", asym_id);
2335  atom.set_value("label_entity_id", entity_id);
2336 
2337  atom.set_value_if_empty({"group_PDB", "HETATM"});
2338  atom.set_value_if_empty({"label_comp_id", comp_id});
2339  atom.set_value_if_empty({"label_seq_id", ""});
2340  atom.set_value_if_empty({"auth_comp_id", comp_id});
2341  atom.set_value_if_empty({"auth_seq_id", 1});
2342  atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
2343  atom.set_value_if_empty({"label_alt_id", ""});
2344 
2345  auto row = atom_site.emplace(atom.begin(), atom.end());
2346 
2347  auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
2348  res.add_atom(newAtom);
2349  }
2350 
2351  auto &pdbx_nonpoly_scheme = m_db["pdbx_nonpoly_scheme"];
2352  size_t ndb_nr = pdbx_nonpoly_scheme.find("asym_id"_key == asym_id and "entity_id"_key == entity_id).size() + 1;
2353  pdbx_nonpoly_scheme.emplace({
2354  {"asym_id", asym_id},
2355  {"entity_id", entity_id},
2356  {"mon_id", comp_id},
2357  {"ndb_seq_num", ndb_nr},
2358  {"pdb_seq_num", res.get_auth_seq_id()},
2359  {"auth_seq_num", res.get_auth_seq_id()},
2360  {"pdb_mon_id", comp_id},
2361  {"auth_mon_id", comp_id},
2362  {"pdb_strand_id", asym_id},
2363  {"pdb_ins_code", "."},
2364  });
2365 
2366  return asym_id;
2367 }
2368 
2369 branch &structure::create_branch()
2370 {
2371  auto &entity = m_db["entity"];
2372  auto &struct_asym = m_db["struct_asym"];
2373 
2374  auto entity_id = entity.get_unique_id("");
2375  auto asym_id = struct_asym.get_unique_id();
2376 
2377  entity.emplace({
2378  {"id", entity_id},
2379  {"type", "branched"}
2380  });
2381 
2382  struct_asym.emplace({
2383  {"id", asym_id},
2384  {"pdbx_blank_PDB_chainid_flag", "N"},
2385  {"pdbx_modified", "N"},
2386  {"entity_id", entity_id},
2387  {"details", "?"}
2388  });
2389 
2390  return m_branches.emplace_back(*this, asym_id, entity_id);
2391 }
2392 
2393 // branch &structure::create_branch(std::vector<row_initializer> atoms)
2394 // {
2395 // // // sanity check
2396 // // for (auto &nag_atom : atoms)
2397 // // {
2398 // // for (const auto &[name, value] : nag_atom)
2399 // // {
2400 // // if (name == "label_comp_id" and value != "NAG")
2401 // // throw std::logic_error("The first sugar in a branch should be a NAG");
2402 // // }
2403 // // }
2404 
2405 // // using namespace literals;
2406 
2407 // // auto &branch = create_branch();
2408 // // auto asym_id = branch.get_asym_id();
2409 // // auto entity_id = branch.get_entity_id();
2410 
2411 // // auto &sugar = branch.emplace_back(branch, "NAG", asym_id, 1);
2412 
2413 // // auto &atom_site = m_db["atom_site"];
2414 
2415 // // for (auto &atom : atoms)
2416 // // {
2417 // // auto atom_id = atom_site.get_unique_id("");
2418 
2419 // // atom.set_value("id", atom_id);
2420 // // atom.set_value("label_asym_id", asym_id);
2421 // // atom.set_value("auth_asym_id", asym_id);
2422 // // atom.set_value("label_entity_id", entity_id);
2423 // // atom.set_value({ "auth_seq_id", 1 });
2424 
2425 // // atom.set_value_if_empty({"group_PDB", "HETATM"});
2426 // // atom.set_value_if_empty({"label_comp_id", "NAG"});
2427 // // atom.set_value_if_empty({"label_seq_id", "."});
2428 // // atom.set_value_if_empty({"auth_comp_id", "NAG"});
2429 // // atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
2430 // // atom.set_value_if_empty({"label_alt_id", ""});
2431 
2432 // // auto row = atom_site.emplace(atom.begin(), atom.end());
2433 
2434 // // auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
2435 // // sugar.add_atom(newAtom);
2436 // // }
2437 
2438 // // // // now we can create the entity and get the real ID
2439 // // // auto entity_id = create_entity_for_branch(branch);
2440 // // // assert(not entity_id.empty());
2441 
2442 // // for (auto &a : sugar.atoms())
2443 // // a.set_property("label_entity_id", entity_id);
2444 
2445 // // m_db["pdbx_branch_scheme"].emplace({
2446 // // {"asym_id", asym_id},
2447 // // {"entity_id", entity_id},
2448 // // {"num", 1},
2449 // // {"mon_id", "NAG"},
2450 
2451 // // {"pdb_asym_id", asym_id},
2452 // // {"pdb_seq_num", 1},
2453 // // {"pdb_mon_id", "NAG"},
2454 
2455 // // // TODO: need fix, collect from nag_atoms?
2456 // // {"auth_asym_id", asym_id},
2457 // // {"auth_mon_id", "NAG"},
2458 // // {"auth_seq_num", 1},
2459 
2460 // // {"hetero", "n"}
2461 // // });
2462 
2463 // // return branch;
2464 // }
2465 
2466 // branch &structure::extend_branch(const std::string &asym_id, std::vector<row_initializer> atom_info,
2467 // int link_sugar, const std::string &link_atom)
2468 // {
2469 // // // sanity check
2470 // // std::string compoundID;
2471 
2472 // // for (auto &atom : atom_info)
2473 // // {
2474 // // for (const auto &[name, value] : atom)
2475 // // {
2476 // // if (name != "label_comp_id")
2477 // // continue;
2478 
2479 // // if (compoundID.empty())
2480 // // compoundID = value;
2481 // // else if (value != compoundID)
2482 // // throw std::logic_error("All atoms should be of the same type");
2483 // // }
2484 // // }
2485 
2486 // // using namespace literals;
2487 
2488 // // // auto &branch = m_branches.emplace_back(*this, asym_id);
2489 // // auto tmp_entity_id = m_db["entity"].get_unique_id("");
2490 
2491 // // auto &atom_site = m_db["atom_site"];
2492 
2493 // // auto bi = std::find_if(m_branches.begin(), m_branches.end(), [asym_id](branch &b)
2494 // // { return b.get_asym_id() == asym_id; });
2495 // // if (bi == m_branches.end())
2496 // // throw std::logic_error("Create a branch first!");
2497 
2498 // // branch &branch = *bi;
2499 
2500 // // int sugarNum = static_cast<int>(branch.size() + 1);
2501 
2502 // // auto &sugar = branch.emplace_back(branch, compoundID, asym_id, sugarNum);
2503 
2504 // // for (auto &atom : atom_info)
2505 // // {
2506 // // auto atom_id = atom_site.get_unique_id("");
2507 
2508 // // atom.set_value("id", atom_id);
2509 // // atom.set_value("label_asym_id", asym_id);
2510 // // atom.set_value("auth_asym_id", asym_id);
2511 // // atom.set_value("label_entity_id", tmp_entity_id);
2512 // // atom.set_value({"auth_seq_id", sugarNum });
2513 
2514 // // atom.set_value_if_empty({"group_PDB", "HETATM"});
2515 // // atom.set_value_if_empty({"label_comp_id", compoundID});
2516 // // atom.set_value_if_empty({"auth_comp_id", compoundID});
2517 // // atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
2518 // // atom.set_value_if_empty({"label_alt_id", ""});
2519 
2520 // // auto row = atom_site.emplace(atom.begin(), atom.end());
2521 
2522 // // auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
2523 // // sugar.add_atom(newAtom);
2524 // // }
2525 
2526 // // sugar.set_link(branch.at(link_sugar - 1).get_atom_by_atom_id(link_atom));
2527 
2528 // // auto entity_id = create_entity_for_branch(branch);
2529 
2530 // // // Update the entity id of the asym
2531 // // auto &struct_asym = m_db["struct_asym"];
2532 // // auto r = struct_asym.find1("id"_key == asym_id);
2533 // // r["entity_id"] = entity_id;
2534 
2535 // // for (auto &s2 : branch)
2536 // // {
2537 // // for (auto atom : s2.atoms())
2538 // // atom.set_property("label_entity_id", entity_id);
2539 // // }
2540 
2541 // // auto &pdbx_branch_scheme = m_db["pdbx_branch_scheme"];
2542 // // pdbx_branch_scheme.erase("asym_id"_key == asym_id);
2543 
2544 // // for (auto &s2 : branch)
2545 // // {
2546 // // pdbx_branch_scheme.emplace({
2547 // // {"asym_id", asym_id},
2548 // // {"entity_id", entity_id},
2549 // // {"num", s2.num()},
2550 // // {"mon_id", s2.get_compound_id()},
2551 
2552 // // {"pdb_asym_id", asym_id},
2553 // // {"pdb_seq_num", s2.num()},
2554 // // {"pdb_mon_id", s2.get_compound_id()},
2555 
2556 // // // TODO: need fix, collect from nag_atoms?
2557 // // {"auth_asym_id", asym_id},
2558 // // {"auth_mon_id", s2.get_compound_id()},
2559 // // {"auth_seq_num", s2.get_auth_seq_id()},
2560 
2561 // // {"hetero", "n"}
2562 // // });
2563 // // }
2564 
2565 // // return branch;
2566 // }
2567 
2568 std::string structure::create_entity_for_branch(branch &branch)
2569 {
2570  using namespace literals;
2571 
2572  std::string entityName = branch.name();
2573 
2574  auto &entity = m_db["entity"];
2575 
2576  std::string entityID = entity.find_first<std::string>("type"_key == "branched" and "pdbx_description"_key == entityName, "id");
2577 
2578  if (entityID.empty())
2579  {
2580  entityID = entity.get_unique_id("");
2581 
2582  if (VERBOSE)
2583  std::cout << "Creating new entity " << entityID << " for branched sugar " << entityName << std::endl;
2584 
2585  entity.emplace({
2586  {"id", entityID},
2587  {"type", "branched"},
2588  {"src_method", "man"},
2589  {"pdbx_description", entityName},
2590  {"formula_weight", branch.weight()}});
2591 
2592  auto &pdbx_entity_branch_list = m_db["pdbx_entity_branch_list"];
2593  for (auto &sugar : branch)
2594  {
2595  pdbx_entity_branch_list.emplace({
2596  {"entity_id", entityID},
2597  {"comp_id", sugar.get_compound_id()},
2598  {"num", sugar.num()},
2599  {"hetero", "n"}
2600  });
2601  }
2602 
2603  auto &pdbx_entity_branch_link = m_db["pdbx_entity_branch_link"];
2604  for (auto &s1 : branch)
2605  {
2606  auto l2 = s1.get_link();
2607 
2608  if (not l2)
2609  continue;
2610 
2611  auto &s2 = branch.at(std::stoi(l2.get_auth_seq_id()) - 1);
2612  auto l1 = s2.get_atom_by_atom_id("C1");
2613 
2614  pdbx_entity_branch_link.emplace({
2615  {"link_id", pdbx_entity_branch_link.get_unique_id("")},
2616  {"entity_id", entityID},
2617  {"entity_branch_list_num_1", s1.get_auth_seq_id()},
2618  {"comp_id_1", s1.get_compound_id()},
2619  {"atom_id_1", l1.get_label_atom_id()},
2620  {"leaving_atom_id_1", "O1"},
2621  {"entity_branch_list_num_2", s2.get_auth_seq_id()},
2622  {"comp_id_2", s2.get_compound_id()},
2623  {"atom_id_2", l2.get_label_atom_id()},
2624  {"leaving_atom_id_2", "H" + l2.get_label_atom_id()},
2625  {"value_order", "sing"}
2626  });
2627  }
2628  }
2629 
2630  return entityID;
2631 }
2632 
2633 void structure::cleanup_empty_categories()
2634 {
2635  using namespace literals;
2636 
2637  auto &atomSite = m_db["atom_site"];
2638 
2639  // Remove chem_comp's for which there are no atoms at all
2640  auto &chem_comp = m_db["chem_comp"];
2641  std::vector<row_handle> obsoleteChemComps;
2642 
2643  for (auto chemComp : chem_comp)
2644  {
2645  std::string compID = chemComp["id"].as<std::string>();
2646  if (atomSite.exists("label_comp_id"_key == compID or "auth_comp_id"_key == compID))
2647  continue;
2648 
2649  obsoleteChemComps.push_back(chemComp);
2650  }
2651 
2652  for (auto chemComp : obsoleteChemComps)
2653  chem_comp.erase(chemComp);
2654 
2655  // similarly, remove entities not referenced by any atom
2656 
2657  auto &entities = m_db["entity"];
2658  std::vector<row_handle> obsoleteEntities;
2659 
2660  for (auto entity : entities)
2661  {
2662  std::string entityID = entity["id"].as<std::string>();
2663  if (atomSite.exists("label_entity_id"_key == entityID))
2664  continue;
2665 
2666  obsoleteEntities.push_back(entity);
2667  }
2668 
2669  for (auto entity : obsoleteEntities)
2670  entities.erase(entity);
2671 
2672  // the rest?
2673 
2674  for (const char *cat : {"pdbx_entity_nonpoly"})
2675  {
2676  auto &category = m_db[cat];
2677 
2678  std::vector<row_handle> empty;
2679  for (auto row : category)
2680  {
2681  if (not category.has_children(row) and not category.has_parents(row))
2682  empty.push_back(row);
2683  }
2684 
2685  for (auto row : empty)
2686  category.erase(row);
2687  }
2688 
2689  // count molecules
2690  for (auto entity : entities)
2691  {
2692  std::string type, id;
2693  cif::tie(type, id) = entity.get("type", "id");
2694 
2695  std::optional<size_t> count;
2696  if (type == "polymer")
2697  count = m_db["struct_asym"].find("entity_id"_key == id).size();
2698  else if (type == "non-polymer" or type == "water")
2699  count = m_db["pdbx_nonpoly_scheme"].find("entity_id"_key == id).size();
2700  else if (type == "branched")
2701  {
2702  // is this correct?
2703  std::set<std::string> asym_ids;
2704  for (const auto &asym_id : m_db["pdbx_branch_scheme"].find<std::string>("entity_id"_key == id, "asym_id"))
2705  asym_ids.insert(asym_id);
2706  count = asym_ids.size();
2707  }
2708 
2709  entity["pdbx_number_of_molecules"] = count.value_or(0);
2710  }
2711 }
2712 
2714 {
2715  for (auto &a : m_atoms)
2716  a.translate(t);
2717 }
2718 
2719 void structure::rotate(quaternion q)
2720 {
2721  for (auto &a : m_atoms)
2722  a.rotate(q);
2723 }
2724 
2725 void structure::translate_and_rotate(point t, quaternion q)
2726 {
2727  for (auto &a : m_atoms)
2728  a.translate_and_rotate(t, q);
2729 }
2730 
2731 void structure::translate_rotate_and_translate(point t1, quaternion q, point t2)
2732 {
2733  for (auto &a : m_atoms)
2734  a.translate_rotate_and_translate(t1, q, t2);
2735 }
2736 
2737 void structure::validate_atoms() const
2738 {
2739  // validate order
2740  assert(m_atoms.size() == m_atom_index.size());
2741  for (size_t i = 0; i + i < m_atoms.size(); ++i)
2742  assert(m_atoms[m_atom_index[i]].id().compare(m_atoms[m_atom_index[i + 1]].id()) < 0);
2743 
2744  std::vector<atom> atoms = m_atoms;
2745 
2746  auto removeAtomFromList = [&atoms](const atom &a)
2747  {
2748  auto i = std::find(atoms.begin(), atoms.end(), a);
2749  assert(i != atoms.end());
2750  atoms.erase(i);
2751  };
2752 
2753  for (auto &poly : m_polymers)
2754  {
2755  for (auto &monomer : poly)
2756  {
2757  for (auto &atom : monomer.atoms())
2758  removeAtomFromList(atom);
2759  }
2760  }
2761 
2762  for (auto &branch : m_branches)
2763  {
2764  for (auto &sugar : branch)
2765  {
2766  for (auto &atom : sugar.atoms())
2767  removeAtomFromList(atom);
2768  }
2769  }
2770 
2771  for (auto &res : m_non_polymers)
2772  {
2773  for (auto &atom : res.atoms())
2774  removeAtomFromList(atom);
2775  }
2776 
2777  assert(atoms.empty());
2778 }
2779 
2780 // --------------------------------------------------------------------
2781 
2782 void reconstruct_pdbx(datablock &db)
2783 {
2784  if (db.get("atom_site") == nullptr)
2785  throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
2786 
2787 
2788 }
2789 
2790 } // namespace pdbx
point centroid(const std::vector< point > &pts)
Definition: point.cpp:397
doublereal * c
void sqrt(Image< double > &op)
std::vector< SelLine >::iterator find(std::vector< SelLine > &text, const std::string &img_name)
Definition: selfile.cpp:553
void reconstruct_pdbx(datablock &db)
Definition: model.cpp:2782
void abs(Image< double > &op)
bool iequals(std::string_view a, std::string_view b)
Definition: text.cpp:59
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
#define rotate(a, i, j, k, l)
doublereal * d
doublereal * b
viol index
viol type
const std::map< std::string, std::vector< std::string > > kChiAtomsMap
Definition: model.cpp:756
double * f
void max(Image< double > &op1, const Image< double > &op2)
int VERBOSE
Definition: utilities.cpp:58
#define j
int m
TYPE distance(struct Point_T *p, struct Point_T *q)
Definition: point.cpp:28
void translate(PDB *pdb_original, PDB *pdb_move, unsigned num_atoms, double x0, double y0, double z0)
Definition: lib_pwk.cpp:125
double psi(const double x)
std::ostream & operator<<(std::ostream &os, const atom &atom)
Definition: model.cpp:291
float r2
std::string to_string(bond_type bondType)
Definition: compound.cpp:43
int * n
doublereal * a
float r1