Xmipp  v3.23.11-Nereus
cif2pdb.cpp
Go to the documentation of this file.
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "cif++.hpp"
28 #include "cif++/pdb/cif2pdb.hpp"
29 #include "cif++/gzio.hpp"
30 
31 #include <cmath>
32 #include <deque>
33 #include <iomanip>
34 #include <map>
35 #include <regex>
36 #include <set>
37 
38 
39 namespace cif::pdb
40 {
41 
42 using namespace std::literals;
43 
44 // --------------------------------------------------------------------
45 // conversion routines between cif and pdb format
46 
47 std::string cif2pdbDate(const std::string &d)
48 {
49  const std::regex rx(R"((\d{4})-(\d{2})(?:-(\d{2}))?)");
50  const char *kMonths[12] = {
51  "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
52  };
53 
54  std::smatch m;
55  std::string result;
56 
57  if (std::regex_match(d, m, rx))
58  {
59  int year = std::stoi(m[1].str());
60  int month = std::stoi(m[2].str());
61 
62  if (m[3].matched)
63  result = cif::format("%02.2d-%3.3s-%02.2d", stoi(m[3].str()), kMonths[month - 1], (year % 100)).str();
64  else
65  result = cif::format("%3.3s-%02.2d", kMonths[month - 1], (year % 100)).str();
66  }
67 
68  return result;
69 }
70 
71 std::string cif2pdbAuth(std::string name)
72 {
73  const std::regex rx(R"(([^,]+), (\S+))");
74 
75  std::smatch m;
76  if (std::regex_match(name, m, rx))
77  name = m[2].str() + m[1].str();
78 
79  return name;
80 }
81 
82 std::string cif2pdbSymmetry(std::string s)
83 {
84  auto i = s.rfind('_');
85  if (i != std::string::npos)
86  s.erase(i, 1);
87  return s;
88 }
89 
90 std::string cif2pdbAtomName(std::string name, std::string resName, const datablock &db)
91 {
92  if (name.length() < 4)
93  {
94  for (auto r : db["atom_site"].find(key("label_atom_id") == name and key("label_comp_id") == resName))
95  {
96  std::string element = r["type_symbol"].as<std::string>();
97 
98  if (element.length() == 1 or not iequals(name, element))
99  name.insert(name.begin(), ' ');
100 
101  break;
102  }
103  }
104 
105  return name;
106 }
107 
109 {
115 };
116 
117 std::string cifSoftware(const datablock &db, SoftwareType sw)
118 {
119  std::string result = "NULL";
120 
121  try
122  {
123  switch (sw)
124  {
125  case eRefinement: result = db["computing"].find_first<std::string>(key("entry_id") == db.name(), "structure_refinement"); break;
126  case eDataScaling: result = db["computing"].find_first<std::string>(key("entry_id") == db.name(), "pdbx_data_reduction_ds"); break;
127  case eDataReduction: result = db["computing"].find_first<std::string>(key("entry_id") == db.name(), "pdbx_data_reduction_ii"); break;
128  default: break;
129  }
130 
131  if (result.empty() or result == "NULL")
132  {
133  auto &software = db["software"];
134 
135  row_handle r;
136 
137  switch (sw)
138  {
139  case eRefinement: r = software.find_first(key("classification") == "refinement"); break;
140  case eDataScaling: r = software.find_first(key("classification") == "data scaling"); break;
141  case eDataExtraction: r = software.find_first(key("classification") == "data extraction"); break;
142  case eDataReduction: r = software.find_first(key("classification") == "data reduction"); break;
143  case ePhasing: r = software.find_first(key("classification") == "phasing"); break;
144  }
145 
146  if (not r.empty())
147  result = r["name"].as<std::string>() + " " + r["version"].as<std::string>();
148  }
149 
150  trim(result);
151  to_upper(result);
152 
153  if (result.empty())
154  result = "NULL";
155  }
156  catch (...)
157  {
158  }
159 
160  return result;
161 }
162 
163 // Map asym ID's back to PDB Chain ID's
164 std::vector<std::string> MapAsymIDs2ChainIDs(const std::vector<std::string> &asymIDs, const datablock &db)
165 {
166  std::set<std::string> result;
167 
168  for (auto asym : asymIDs)
169  {
170  for (auto r : db["pdbx_poly_seq_scheme"].find(key("asym_id") == asym))
171  {
172  result.insert(r["pdb_strand_id"].as<std::string>());
173  break;
174  }
175 
176  for (auto r : db["pdbx_nonpoly_scheme"].find(key("asym_id") == asym))
177  {
178  result.insert(r["pdb_strand_id"].as<std::string>());
179  break;
180  }
181  }
182 
183  return { result.begin(), result.end() };
184 }
185 
186 // support for wrapping text using a 'continuation marker'
187 size_t WriteContinuedLine(std::ostream &pdbFile, std::string header, int &count, int cLen, std::string text, std::string::size_type lStart = 0)
188 {
189  if (lStart == 0)
190  {
191  if (cLen == 0)
192  lStart = header.length() + 1;
193  else
194  lStart = header.length() + cLen;
195  }
196 
197  std::string::size_type maxLength = 80 - lStart - 1;
198 
199  std::vector<std::string> lines = word_wrap(text, maxLength);
200 
201  for (auto &line : lines)
202  {
203  // to_upper(line);
204 
205  pdbFile << header;
206 
207  if (++count <= 1 or cLen == 0)
208  {
209  pdbFile << std::string(lStart - header.length(), ' ');
210  if (count == 1)
211  lStart = header.length() + cLen + 1;
212  }
213  else
214  pdbFile << std::fixed << std::setw(cLen) << std::right << count << ' ';
215 
216  pdbFile << line << std::endl;
217  }
218 
219  return lines.size();
220 }
221 
222 size_t WriteOneContinuedLine(std::ostream &pdbFile, std::string header, int cLen, std::string line, int lStart = 0)
223 {
224  int count = 0;
225  return WriteContinuedLine(pdbFile, header, count, cLen, line, lStart);
226 }
227 
228 size_t WriteCitation(std::ostream &pdbFile, const datablock &db, row_handle r, int reference)
229 {
230  size_t result = 0;
231 
232  std::string s1;
233 
234  if (reference > 0)
235  {
236  pdbFile << "REMARK 1 REFERENCE " << std::to_string(reference) << std::endl;
237  result = 1;
238  s1 = "REMARK 1 ";
239  }
240  else
241  s1 = "JRNL ";
242 
243  std::string id, title, pubname, volume, astm, country, issn, csd, publ, pmid, doi, pageFirst, pageLast, year;
244 
245  cif::tie(id, title, pubname, volume, astm, country, issn, csd, publ, pmid, doi, pageFirst, pageLast, year) =
246  r.get("id", "title", "journal_abbrev", "journal_volume", "journal_id_ASTM", "country", "journal_id_ISSN",
247  "journal_id_CSD", "book_publisher", "pdbx_database_id_PubMed", "pdbx_database_id_DOI",
248  "page_first", "page_last", "year");
249 
250  std::vector<std::string> authors;
251  for (auto r1 : db["citation_author"].find(key("citation_id") == id))
252  authors.push_back(cif2pdbAuth(r1["name"].as<std::string>()));
253 
254  if (not authors.empty())
255  result += WriteOneContinuedLine(pdbFile, s1 + "AUTH", 2, join(authors, ","), 19);
256 
257  result += WriteOneContinuedLine(pdbFile, s1 + "TITL", 2, title, 19);
258 
259  if (not pubname.empty())
260  {
261  to_upper(pubname);
262 
263  const std::string kRefHeader = s1 + "REF %2.2s %-28.28s %2.2s%4.4s %5.5s %4.4s";
264  pdbFile << cif::format(kRefHeader, "" /* continuation */, pubname, (volume.empty() ? "" : "V."), volume, pageFirst, year)
265  << std::endl;
266  ++result;
267  }
268 
269  if (not issn.empty())
270  {
271  const std::string kRefHeader = s1 + "REFN ISSN %-25.25s";
272  pdbFile << cif::format(kRefHeader, issn) << std::endl;
273  ++result;
274  }
275 
276  // if (not issn.empty() or astm.empty())
277  // {
280  // const char kRefHeader[] =
281  // "REMARK 1 REFN %4.4s %-6.6s %2.2s %-25.25s";
282  //
283  // pdbFile << (boost::cif::format(kRefHeader)
284  // % (astm.empty() ? "" : "ASTN")
285  // % astm
286  // % country
287  // % issn).str()
288  // << std::endl;
289  // }
290 
291  if (not pmid.empty())
292  {
293  const std::string kPMID = s1 + "PMID %-60.60s ";
294  pdbFile << cif::format(kPMID, pmid) << std::endl;
295  ++result;
296  }
297 
298  if (not doi.empty())
299  {
300  const std::string kDOI = s1 + "DOI %-60.60s ";
301  pdbFile << cif::format(kDOI, doi) << std::endl;
302  ++result;
303  }
304 
305  return result;
306 }
307 
308 void write_header_lines(std::ostream &pdbFile, const datablock &db)
309 {
310  // 0 1 2 3 4 5 6 7 8
311  // HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII
312  const char kHeader[] =
313  "HEADER %-40.40s"
314  "%-9.9s"
315  " %-4.4s";
316 
317  // HEADER
318 
319  std::string keywords;
320  auto &cat1 = db["struct_keywords"];
321 
322  for (auto r : cat1)
323  {
324  keywords = r["pdbx_keywords"].as<std::string>();
325  break;
326  }
327 
328  std::string date;
329  for (auto r : db["pdbx_database_status"])
330  {
331  date = r["recvd_initial_deposition_date"].as<std::string>();
332  if (date.empty())
333  continue;
334  date = cif2pdbDate(date);
335  break;
336  }
337 
338  if (date.empty())
339  {
340  for (auto r : db["database_PDB_rev"])
341  {
342  date = r["date_original"].as<std::string>();
343  if (date.empty())
344  continue;
345  date = cif2pdbDate(date);
346  break;
347  }
348  }
349 
350  pdbFile << cif::format(kHeader, keywords, date, db.name()) << std::endl;
351 
352  // TODO: implement
353  // OBSLTE (skip for now)
354 
355  // TITLE
356  for (auto r : db["struct"])
357  {
358  std::string title = r["title"].as<std::string>();
359  trim(title);
360  WriteOneContinuedLine(pdbFile, "TITLE ", 2, title);
361  break;
362  }
363 
364  // COMPND
365  using namespace std::placeholders;
366 
367  int molID = 0;
368  std::vector<std::string> cmpnd;
369 
370  for (auto r : db["entity"])
371  {
372  if (r["type"] != "polymer")
373  continue;
374 
375  std::string entityID = r["id"].as<std::string>();
376 
377  ++molID;
378  cmpnd.push_back("MOL_ID: " + std::to_string(molID));
379 
380  std::string molecule = r["pdbx_description"].as<std::string>();
381  cmpnd.push_back("MOLECULE: " + molecule);
382 
383  auto poly = db["entity_poly"].find(key("entity_id") == entityID);
384  if (not poly.empty())
385  {
386  std::string chains = poly.front()["pdbx_strand_id"].as<std::string>();
387  replace_all(chains, ",", ", ");
388  cmpnd.push_back("CHAIN: " + chains);
389  }
390 
391  std::string fragment = r["pdbx_fragment"].as<std::string>();
392  if (not fragment.empty())
393  cmpnd.push_back("FRAGMENT: " + fragment);
394 
395  for (auto sr : db["entity_name_com"].find(key("entity_id") == entityID))
396  {
397  std::string syn = sr["name"].as<std::string>();
398  if (not syn.empty())
399  cmpnd.push_back("SYNONYM: " + syn);
400  }
401 
402  std::string mutation = r["pdbx_mutation"].as<std::string>();
403  if (not mutation.empty())
404  cmpnd.push_back("MUTATION: " + mutation);
405 
406  std::string ec = r["pdbx_ec"].as<std::string>();
407  if (not ec.empty())
408  cmpnd.push_back("EC: " + ec);
409 
410  if (r["src_method"] == "man" or r["src_method"] == "syn")
411  cmpnd.push_back("ENGINEERED: YES");
412 
413  std::string details = r["details"].as<std::string>();
414  if (not details.empty())
415  cmpnd.push_back("OTHER_DETAILS: " + details);
416  }
417 
418  WriteOneContinuedLine(pdbFile, "COMPND ", 3, join(cmpnd, ";\n"));
419 
420  // SOURCE
421 
422  molID = 0;
423  std::vector<std::string> source;
424 
425  for (auto r : db["entity"])
426  {
427  if (r["type"] != "polymer")
428  continue;
429 
430  std::string entityID = r["id"].as<std::string>();
431 
432  ++molID;
433  source.push_back("MOL_ID: " + std::to_string(molID));
434 
435  if (r["src_method"] == "syn")
436  source.push_back("SYNTHETIC: YES");
437 
438  auto &gen = db["entity_src_gen"];
439  const std::pair<const char *, const char *> kGenSourceMapping[] = {
440  { "gene_src_common_name", "ORGANISM_COMMON" },
441  { "pdbx_gene_src_gene", "GENE" },
442  { "gene_src_strain", "STRAIN" },
443  { "pdbx_gene_src_cell_line", "CELL_LINE" },
444  { "pdbx_gene_src_organelle", "ORGANELLE" },
445  { "pdbx_gene_src_cellular_location", "CELLULAR_LOCATION" },
446  { "pdbx_gene_src_scientific_name", "ORGANISM_SCIENTIFIC" },
447  { "pdbx_gene_src_ncbi_taxonomy_id", "ORGANISM_TAXID" },
448  { "pdbx_host_org_scientific_name", "EXPRESSION_SYSTEM" },
449  { "pdbx_host_org_ncbi_taxonomy_id", "EXPRESSION_SYSTEM_TAXID" },
450  { "pdbx_host_org_strain", "EXPRESSION_SYSTEM_STRAIN" },
451  { "pdbx_host_org_variant", "EXPRESSION_SYSTEM_VARIANT" },
452  { "pdbx_host_org_cellular_location", "EXPRESSION_SYSTEM_CELLULAR_LOCATION" },
453  { "pdbx_host_org_vector_type", "EXPRESSION_SYSTEM_VECTOR_TYPE" },
454  { "pdbx_host_org_vector", "EXPRESSION_SYSTEM_VECTOR" },
455  { "pdbx_host_org_gene", "EXPRESSION_SYSTEM_GENE" },
456  { "plasmid_name", "EXPRESSION_SYSTEM_PLASMID" },
457  { "details", "OTHER_DETAILS" }
458  };
459 
460  for (auto gr : gen.find(key("entity_id") == entityID))
461  {
462  for (const auto &[cname, sname] : kGenSourceMapping)
463  {
464  std::string s = gr[cname].as<std::string>();
465  if (not s.empty())
466  source.push_back(sname + ": "s + s);
467  }
468  }
469 
470  auto &nat = db["entity_src_nat"];
471  const std::pair<const char *, const char *> kNatSourceMapping[] = {
472  { "common_name", "ORGANISM_COMMON" },
473  { "strain", "STRAIN" },
474  { "pdbx_organism_scientific", "ORGANISM_SCIENTIFIC" },
475  { "pdbx_ncbi_taxonomy_id", "ORGANISM_TAXID" },
476  { "pdbx_cellular_location", "CELLULAR_LOCATION" },
477  { "pdbx_plasmid_name", "PLASMID" },
478  { "pdbx_organ", "ORGAN" },
479  { "details", "OTHER_DETAILS" }
480  };
481 
482  for (auto nr : nat.find(key("entity_id") == entityID))
483  {
484  for (const auto &[cname, sname] : kNatSourceMapping)
485  {
486  std::string s = nr[cname].as<std::string>();
487  if (not s.empty())
488  source.push_back(sname + ": "s + s);
489  }
490  }
491  }
492 
493  WriteOneContinuedLine(pdbFile, "SOURCE ", 3, join(source, ";\n"));
494 
495  // KEYWDS
496 
497  keywords.clear();
498  for (auto r : cat1)
499  {
500  if (not r["text"].empty())
501  keywords += r["text"].as<std::string>();
502  else
503  keywords += r["pdbx_keywords"].as<std::string>();
504  }
505 
506  if (not keywords.empty())
507  WriteOneContinuedLine(pdbFile, "KEYWDS ", 2, keywords);
508 
509  // EXPDTA
510 
511  auto &dbexpt = db["exptl"];
512  if (not dbexpt.empty())
513  {
514  std::vector<std::string> method;
515  for (auto r : dbexpt)
516  method.push_back(r["method"].as<std::string>());
517  if (not method.empty())
518  WriteOneContinuedLine(pdbFile, "EXPDTA ", 2, join(method, "; "));
519  }
520 
521  // NUMMDL
522  // TODO...
523 
524  // MDLTYP
525  // TODO...
526 
527  // AUTHOR
528  std::vector<std::string> authors;
529  for (auto r : db["audit_author"])
530  authors.push_back(cif2pdbAuth(r["name"].as<std::string>()));
531  if (not authors.empty())
532  WriteOneContinuedLine(pdbFile, "AUTHOR ", 2, join(authors, ","));
533 }
534 
535 void WriteTitle(std::ostream &pdbFile, const datablock &db)
536 {
537  write_header_lines(pdbFile, db);
538 
539  // REVDAT
540  const char kRevDatFmt[] = "REVDAT %3d%2.2s %9.9s %4.4s %1d ";
541  auto &cat2 = db["database_PDB_rev"];
542  std::vector<row_handle> rev(cat2.begin(), cat2.end());
543  sort(rev.begin(), rev.end(), [](row_handle a, row_handle b) -> bool
544  { return a["num"].as<int>() > b["num"].as<int>(); });
545  for (auto r : rev)
546  {
547  int revNum, modType;
548  std::string date, replaces;
549 
550  cif::tie(revNum, modType, date, replaces) = r.get("num", "mod_type", "date", "replaces");
551 
552  date = cif2pdbDate(date);
553 
554  std::vector<std::string> types;
555 
556  for (auto r1 : db["database_PDB_rev_record"].find(key("rev_num") == revNum))
557  types.push_back(r1["type"].as<std::string>());
558 
559  int continuation = 0;
560  do
561  {
562  std::string cs = ++continuation > 1 ? std::to_string(continuation) : std::string();
563 
564  pdbFile << cif::format(kRevDatFmt, revNum, cs, date, db.name(), modType);
565  for (size_t i = 0; i < 4; ++i)
566  pdbFile << cif::format(" %-6.6s", (i < types.size() ? types[i] : std::string()));
567  pdbFile << std::endl;
568 
569  if (types.size() > 4)
570  types.erase(types.begin(), types.begin() + 4);
571  else
572  types.clear();
573  } while (types.empty() == false);
574  }
575 
576  // SPRSDE
577  // TODO...
578 
579  // JRNL
580  for (auto r : db["citation"])
581  {
582  WriteCitation(pdbFile, db, r, 0);
583  break;
584  }
585 }
586 
587 void WriteRemark1(std::ostream &pdbFile, const datablock &db)
588 {
589  int reference = 0;
590 
591  for (auto r : db["citation"])
592  {
593  if (reference > 0)
594  {
595  if (reference == 1)
596  pdbFile << "REMARK 1" << std::endl;
597 
598  WriteCitation(pdbFile, db, r, reference);
599  }
600 
601  ++reference;
602  }
603 }
604 
605 void WriteRemark2(std::ostream &pdbFile, const datablock &db)
606 {
607  auto &refine = db["refine"];
608  if (refine.empty())
609  {
610  pdbFile << "REMARK 2" << std::endl
611  << "REMARK 2 RESOLUTION. NOT APPLICABLE." << std::endl;
612  }
613  else
614  {
615  try
616  {
617  float resHigh = refine.front()["ls_d_res_high"].as<float>();
618  pdbFile << "REMARK 2" << std::endl
619  << cif::format("REMARK 2 RESOLUTION. %7.2f ANGSTROMS.", resHigh) << std::endl;
620  }
621  catch (...)
622  { /* skip it */
623  }
624  }
625 }
626 
627 // --------------------------------------------------------------------
628 // Code to help format RERMARK 3 data
629 
630 class FBase
631 {
632  public:
633  virtual ~FBase() {}
634 
635  virtual void out(std::ostream &os) = 0;
636 
637  protected:
638  FBase(row_handle r, const char *f)
639  : mRow(r)
640  , mField(f)
641  {
642  }
643  FBase(const category &cat, condition &&cond, const char *f)
644  : mField(f)
645  {
646  auto r = cat.find(std::move(cond));
647  if (not r.empty())
648  mRow = r.front();
649  }
650 
651  std::string_view text() const
652  {
653  return mRow.empty() or mRow[mField].empty() ? "" : mRow[mField].text();
654  }
655 
656  row_handle mRow;
657  const char *mField;
658 };
659 
660 class Fi : public FBase
661 {
662  public:
663  Fi(row_handle r, const char *f)
664  : FBase(r, f)
665  {
666  }
667  Fi(const category &cat, condition &&cond, const char *f)
668  : FBase(cat, std::move(cond), f)
669  {
670  }
671 
672  virtual void out(std::ostream &os)
673  {
674  std::string s{ text() };
675 
676  if (s.empty())
677  {
678  os << "NULL";
679  if (os.width() > 4)
680  os << std::string(os.width() - 4, ' ');
681  }
682  else
683  {
684  long l = 0;
685  auto r = std::from_chars(s.data(), s.data() + s.length(), l);
686  if (r.ec != std::errc())
687  {
688  if (VERBOSE > 0)
689  std::cerr << "Failed to write '" << s << "' as a long from field " << mField << ", this indicates an error in the code for writing PDB files" << std::endl;
690  os << s;
691  }
692  else
693  os << l;
694  }
695  }
696 };
697 
698 class Ff : public FBase
699 {
700  public:
701  Ff(row_handle r, const char *f)
702  : FBase(r, f)
703  {
704  }
705  Ff(const category &cat, condition &&cond, const char *f)
706  : FBase(cat, std::move(cond), f)
707  {
708  }
709 
710  virtual void out(std::ostream &os)
711  {
712  if (mRow.empty() or mRow[mField].empty())
713  {
714  os << "NULL";
715  if (os.width() > 4)
716  os << std::string(os.width() - 4, ' ');
717  }
718  else
719  {
720  std::string s{ text() };
721 
722  double d = 0;
723  auto r = cif::from_chars(s.data(), s.data() + s.length(), d);
724  if (r.ec != std::errc())
725  {
726  if (VERBOSE > 0)
727  std::cerr << "Failed to write '" << s << "' as a double from field " << mField << ", this indicates an error in the code for writing PDB files" << std::endl;
728  os << s;
729  }
730  else
731  os << d;
732  }
733  }
734 };
735 
736 class Fs : public FBase
737 {
738  public:
739  Fs(row_handle r, const char *f, int remarkNr = 3)
740  : FBase(r, f)
741  , mNr(remarkNr)
742  {
743  }
744  Fs(const category &cat, condition &&cond, const char *f, int remarkNr = 3)
745  : FBase(cat, std::move(cond), f)
746  , mNr(remarkNr)
747  {
748  }
749 
750  virtual void out(std::ostream &os)
751  {
752  std::string s{ text() };
753  size_t width = os.width();
754 
755  if (s.empty())
756  {
757  os << "NULL";
758  if (os.width() > 4)
759  os << std::string(width - 4, ' ');
760  }
761  else if (width == 0 or s.length() <= width)
762  os << s;
763  else
764  {
765  os << std::endl;
766 
767  std::stringstream ss;
768  ss << "REMARK " << std::setw(3) << std::right << mNr << ' ';
769  WriteOneContinuedLine(os, ss.str(), 0, s);
770  }
771  }
772 
773  int mNr = 3;
774 };
775 
776 std::ostream &operator<<(std::ostream &os, FBase &&fld)
777 {
778  fld.out(os);
779  return os;
780 }
781 
782 template <int N>
783 struct RM
784 {
785  RM(const char *desc, int width = 0, int precision = 6)
786  : mDesc(desc)
787  , mWidth(width)
788  , mPrecision(precision)
789  {
790  }
791  const char *mDesc;
792  int mWidth, mPrecision;
793 };
794 
795 typedef RM<3> RM3;
796 
797 template <int N>
798 std::ostream &operator<<(std::ostream &os, RM<N> &&rm)
799 {
800  os << "REMARK " << std::setw(3) << std::right << N << " " << rm.mDesc << (rm.mWidth > 0 ? std::left : std::right) << std::fixed << std::setw(std::abs(rm.mWidth)) << std::setprecision(rm.mPrecision);
801  return os;
802 }
803 
804 struct SEP
805 {
806  SEP(const char *txt, int width, int precision = 6)
807  : mText(txt)
808  , mWidth(width)
809  , mPrecision(precision)
810  {
811  }
812  const char *mText;
813  int mWidth, mPrecision;
814 };
815 
816 std::ostream &operator<<(std::ostream &os, SEP &&sep)
817 {
818  os << sep.mText << (sep.mWidth > 0 ? std::left : std::right) << std::fixed << std::setw(std::abs(sep.mWidth)) << std::setprecision(sep.mPrecision);
819  return os;
820 }
821 
822 // --------------------------------------------------------------------
823 
824 void WriteRemark3BusterTNT(std::ostream &pdbFile, const datablock &db)
825 {
826  auto refine = db["refine"].front();
827  auto ls_shell = db["refine_ls_shell"].front();
828  auto hist = db["refine_hist"].front();
829  auto reflns = db["reflns"].front();
830  auto analyze = db["refine_analyze"].front();
831  auto &ls_restr = db["refine_ls_restr"];
832  // auto ls_restr_ncs = db["refine_ls_restr_ncs"].front();
833  // auto pdbx_xplor_file = db["pdbx_xplor_file"].front();
834  // auto pdbx_refine = db["pdbx_refine"].front();
835 
836  pdbFile << RM3("") << std::endl
837  << RM3(" DATA USED IN REFINEMENT.") << std::endl
838  << RM3(" RESOLUTION RANGE HIGH (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_high") << std::endl
839  << RM3(" RESOLUTION RANGE LOW (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_low") << std::endl
840  << RM3(" DATA CUTOFF (SIGMA(F)) : ", 6, 3) << Ff(refine, "pdbx_ls_sigma_F") << std::endl
841  << RM3(" COMPLETENESS FOR RANGE (%) : ", 6, 1) << Ff(refine, "ls_percent_reflns_obs") << std::endl
842  << RM3(" NUMBER OF REFLECTIONS : ", 12, 6) << Fi(refine, "ls_number_reflns_obs") << std::endl
843 
844  << RM3("") << std::endl
845  << RM3(" FIT TO DATA USED IN REFINEMENT.") << std::endl
846  << RM3(" CROSS-VALIDATION METHOD : ") << Fs(refine, "pdbx_ls_cross_valid_method") << std::endl
847  << RM3(" FREE R VALUE TEST SET SELECTION : ") << Fs(refine, "pdbx_R_Free_selection_details") << std::endl
848  << RM3(" R VALUE (WORKING + TEST SET) : ", 7, 3) << Ff(refine, "ls_R_factor_obs") << std::endl
849  << RM3(" R VALUE (WORKING SET) : ", 7, 3) << Ff(refine, "ls_R_factor_R_work") << std::endl
850  << RM3(" FREE R VALUE : ", 7, 3) << Ff(refine, "ls_R_factor_R_free") << std::endl
851  << RM3(" FREE R VALUE TEST SET SIZE (%) : ", 7, 3) << Ff(refine, "ls_percent_reflns_R_free") << std::endl
852  << RM3(" FREE R VALUE TEST SET COUNT : ", 12, 6) << Fi(refine, "ls_number_reflns_R_free") << std::endl
853  << RM3(" ESTIMATED ERROR OF FREE R VALUE : ", 7, 3) << Ff(refine, "ls_R_factor_R_free_error") << std::endl
854 
855  << RM3("") << std::endl
856  << RM3(" FIT IN THE HIGHEST RESOLUTION BIN.") << std::endl
857  << RM3(" TOTAL NUMBER OF BINS USED : ", 12, 6) << Fi(ls_shell, "pdbx_total_number_of_bins_used") << std::endl
858  << RM3(" BIN RESOLUTION RANGE HIGH (ANGSTROMS) : ", 5, 2) << Ff(ls_shell, "d_res_high") << std::endl
859  << RM3(" BIN RESOLUTION RANGE LOW (ANGSTROMS) : ", 5, 2) << Ff(ls_shell, "d_res_low") << std::endl
860  << RM3(" BIN COMPLETENESS (WORKING+TEST) (%) : ", 6, 2) << Ff(ls_shell, "percent_reflns_obs") << std::endl
861  << RM3(" REFLECTIONS IN BIN (WORKING + TEST SET) : ", 12, 6) << Fi(ls_shell, "number_reflns_all") << std::endl
862  << RM3(" BIN R VALUE (WORKING + TEST SET) : ", 8, 4) << Ff(ls_shell, "R_factor_all") << std::endl
863  << RM3(" REFLECTIONS IN BIN (WORKING SET) : ", 12, 6) << Fi(ls_shell, "number_reflns_R_work") << std::endl
864  << RM3(" BIN R VALUE (WORKING SET) : ", 8, 4) << Ff(ls_shell, "R_factor_R_work") << std::endl
865  << RM3(" BIN FREE R VALUE : ", 8, 4) << Ff(ls_shell, "R_factor_R_free") << std::endl
866  << RM3(" BIN FREE R VALUE TEST SET SIZE (%) : ", 6, 2) << Ff(ls_shell, "percent_reflns_R_free") << std::endl
867  << RM3(" BIN FREE R VALUE TEST SET COUNT : ", 12, 7) << Fi(ls_shell, "number_reflns_R_free") << std::endl
868  << RM3(" ESTIMATED ERROR OF BIN FREE R VALUE : ", 7, 3) << Ff(ls_shell, "R_factor_R_free_error") << std::endl
869 
870  << RM3("") << std::endl
871  << RM3(" NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.") << std::endl
872  << RM3(" PROTEIN ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_protein") << std::endl
873  << RM3(" NUCLEIC ACID ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_nucleic_acid") << std::endl
874  << RM3(" HETEROGEN ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_ligand") << std::endl
875  << RM3(" SOLVENT ATOMS : ", 12, 6) << Fi(hist, "number_atoms_solvent") << std::endl
876 
877  << RM3("") << std::endl
878  << RM3(" B VALUES.") << std::endl
879  // << RM3(" B VALUE TYPE : ") << Fs(refine, "pdbx_TLS_residual_ADP_flag") << std::endl
880  << RM3(" FROM WILSON PLOT (A**2) : ", 7, 2) << Ff(reflns, "B_iso_Wilson_estimate") << std::endl
881  << RM3(" MEAN B VALUE (OVERALL, A**2) : ", 7, 2) << Ff(refine, "B_iso_mean") << std::endl
882 
883  << RM3(" OVERALL ANISOTROPIC B VALUE.") << std::endl
884  << RM3(" B11 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[1][1]") << std::endl
885  << RM3(" B22 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[2][2]") << std::endl
886  << RM3(" B33 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[3][3]") << std::endl
887  << RM3(" B12 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[1][2]") << std::endl
888  << RM3(" B13 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[1][3]") << std::endl
889  << RM3(" B23 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[2][3]") << std::endl
890 
891  << RM3("") << std::endl
892  << RM3(" ESTIMATED COORDINATE ERROR.") << std::endl
893  << RM3(" ESD FROM LUZZATI PLOT (A) : ", 7, 3) << Ff(analyze, "Luzzati_coordinate_error_obs") << std::endl
894  << RM3(" DPI (BLOW EQ-10) BASED ON R VALUE (A) : ", 5, 3) << Ff(refine, "pdbx_overall_SU_R_Blow_DPI") << std::endl
895  << RM3(" DPI (BLOW EQ-9) BASED ON FREE R VALUE (A) : ", 5, 3) << Ff(refine, "pdbx_overall_SU_R_free_Blow_DPI") << std::endl
896  << RM3(" DPI (CRUICKSHANK) BASED ON R VALUE (A) : ", 5, 3) << Ff(refine, "overall_SU_R_Cruickshank_DPI") << std::endl
897  << RM3(" DPI (CRUICKSHANK) BASED ON FREE R VALUE (A) : ", 5, 3) << Ff(refine, "pdbx_overall_SU_R_free_Cruickshank_DPI") << std::endl
898 
899  << RM3("") << std::endl
900  << RM3(" REFERENCES: BLOW, D. (2002) ACTA CRYST D58, 792-797") << std::endl
901  << RM3(" CRUICKSHANK, D.W.J. (1999) ACTA CRYST D55, 583-601") << std::endl
902 
903  << RM3("") << std::endl
904  << RM3(" CORRELATION COEFFICIENTS.") << std::endl
905  << RM3(" CORRELATION COEFFICIENT FO-FC : ", 5, 3) << Ff(refine, "correlation_coeff_Fo_to_Fc") << std::endl
906  << RM3(" CORRELATION COEFFICIENT FO-FC FREE : ", 5, 3) << Ff(refine, "correlation_coeff_Fo_to_Fc_free") << std::endl
907 
908  << RM3("") << std::endl
909  << RM3(" NUMBER OF GEOMETRIC FUNCTION TERMS DEFINED : 15") << std::endl
910  << RM3(" TERM COUNT WEIGHT FUNCTION.") << std::endl
911  << RM3(" BOND LENGTHS : ", 7, 0) << Ff(ls_restr, key("type") == "t_bond_d", "number")
912  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_bond_d", "weight")
913  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_bond_d", "pdbx_restraint_function") << std::endl
914  << RM3(" BOND ANGLES : ", 7, 0) << Ff(ls_restr, key("type") == "t_angle_deg", "number")
915  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_angle_deg", "weight")
916  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_angle_deg", "pdbx_restraint_function") << std::endl
917  << RM3(" TORSION ANGLES : ", 7, 0) << Ff(ls_restr, key("type") == "t_dihedral_angle_d", "number")
918  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_dihedral_angle_d", "weight")
919  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_dihedral_angle_d", "pdbx_restraint_function") << std::endl
920  << RM3(" TRIGONAL CARBON PLANES : ", 7, 0) << Ff(ls_restr, key("type") == "t_trig_c_planes", "number")
921  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_trig_c_planes", "weight")
922  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_trig_c_planes", "pdbx_restraint_function") << std::endl
923  << RM3(" GENERAL PLANES : ", 7, 0) << Ff(ls_restr, key("type") == "t_gen_planes", "number")
924  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_gen_planes", "weight")
925  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_gen_planes", "pdbx_restraint_function") << std::endl
926  << RM3(" ISOTROPIC THERMAL FACTORS : ", 7, 0) << Ff(ls_restr, key("type") == "t_it", "number")
927  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_it", "weight")
928  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_it", "pdbx_restraint_function") << std::endl
929  << RM3(" BAD NON-BONDED CONTACTS : ", 7, 0) << Ff(ls_restr, key("type") == "t_nbd", "number")
930  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_nbd", "weight")
931  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_nbd", "pdbx_restraint_function") << std::endl
932  << RM3(" IMPROPER TORSIONS : ", 7, 0) << Ff(ls_restr, key("type") == "t_improper_torsion", "number")
933  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_improper_torsion", "weight")
934  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_improper_torsion", "pdbx_restraint_function") << std::endl
935  << RM3(" PSEUDOROTATION ANGLES : ", 7, 0) << Ff(ls_restr, key("type") == "t_pseud_angle", "number")
936  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_pseud_angle", "weight")
937  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_pseud_angle", "pdbx_restraint_function") << std::endl
938  << RM3(" CHIRAL IMPROPER TORSION : ", 7, 0) << Ff(ls_restr, key("type") == "t_chiral_improper_torsion", "number")
939  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_chiral_improper_torsion", "weight")
940  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_chiral_improper_torsion", "pdbx_restraint_function") << std::endl
941  << RM3(" SUM OF OCCUPANCIES : ", 7, 0) << Ff(ls_restr, key("type") == "t_sum_occupancies", "number")
942  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_sum_occupancies", "weight")
943  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_sum_occupancies", "pdbx_restraint_function") << std::endl
944  << RM3(" UTILITY DISTANCES : ", 7, 0) << Ff(ls_restr, key("type") == "t_utility_distance", "number")
945  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_utility_distance", "weight")
946  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_utility_distance", "pdbx_restraint_function") << std::endl
947  << RM3(" UTILITY ANGLES : ", 7, 0) << Ff(ls_restr, key("type") == "t_utility_angle", "number")
948  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_utility_angle", "weight")
949  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_utility_angle", "pdbx_restraint_function") << std::endl
950  << RM3(" UTILITY TORSION : ", 7, 0) << Ff(ls_restr, key("type") == "t_utility_torsion", "number")
951  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_utility_torsion", "weight")
952  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_utility_torsion", "pdbx_restraint_function") << std::endl
953  << RM3(" IDEAL-DIST CONTACT TERM : ", 7, 0) << Ff(ls_restr, key("type") == "t_ideal_dist_contact", "number")
954  << SEP("; ", 7, 3) << Ff(ls_restr, key("type") == "t_ideal_dist_contact", "weight")
955  << SEP("; ", 12) << Fs(ls_restr, key("type") == "t_ideal_dist_contact", "pdbx_restraint_function") << std::endl
956 
957  << RM3("") << std::endl
958  << RM3(" RMS DEVIATIONS FROM IDEAL VALUES.") << std::endl
959  << RM3(" BOND LENGTHS (A) : ", 7, 3) << Ff(ls_restr, key("type") == "t_bond_d", "dev_ideal") << std::endl
960  << RM3(" BOND ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, key("type") == "t_angle_deg", "dev_ideal") << std::endl
961  << RM3(" PEPTIDE OMEGA TORSION ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, key("type") == "t_omega_torsion", "dev_ideal") << std::endl
962  << RM3(" OTHER TORSION ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, key("type") == "t_other_torsion", "dev_ideal") << std::endl;
963 
964  auto &tls = db["pdbx_refine_tls"];
965 
966  pdbFile << RM3("") << std::endl
967  << RM3(" TLS DETAILS") << std::endl
968  << RM3(" NUMBER OF TLS GROUPS : ") << (tls.size() ? std::to_string(tls.size()) : "NULL") << std::endl;
969 
970  for (auto t : tls)
971  {
972  std::string id = t["id"].as<std::string>();
973  auto g = db["pdbx_refine_tls_group"].find_first(key("refine_tls_id") == id);
974 
975  pdbFile << RM3("") << std::endl
976  << RM3(" TLS GROUP : ") << id << std::endl
977  << RM3(" SELECTION: ") << Fs(g, "selection_details") << std::endl;
978 
979  pdbFile << RM3(" ORIGIN FOR THE GROUP (A):", -9, 4) << Ff(t, "origin_x")
980  << SEP("", -9, 4) << Ff(t, "origin_y")
981  << SEP("", -9, 4) << Ff(t, "origin_z") << std::endl
982  << RM3(" T TENSOR") << std::endl
983  << RM3(" T11:", -9, 4) << Ff(t, "T[1][1]") << SEP(" T22:", -9, 4) << Ff(t, "T[2][2]") << std::endl
984  << RM3(" T33:", -9, 4) << Ff(t, "T[3][3]") << SEP(" T12:", -9, 4) << Ff(t, "T[1][2]") << std::endl
985  << RM3(" T13:", -9, 4) << Ff(t, "T[1][3]") << SEP(" T23:", -9, 4) << Ff(t, "T[2][3]") << std::endl
986  << RM3(" L TENSOR") << std::endl
987  << RM3(" L11:", -9, 4) << Ff(t, "L[1][1]") << SEP(" L22:", -9, 4) << Ff(t, "L[2][2]") << std::endl
988  << RM3(" L33:", -9, 4) << Ff(t, "L[3][3]") << SEP(" L12:", -9, 4) << Ff(t, "L[1][2]") << std::endl
989  << RM3(" L13:", -9, 4) << Ff(t, "L[1][3]") << SEP(" L23:", -9, 4) << Ff(t, "L[2][3]") << std::endl
990  << RM3(" S TENSOR") << std::endl
991  << RM3(" S11:", -9, 4) << Ff(t, "S[1][1]") << SEP(" S12:", -9, 4) << Ff(t, "S[1][2]") << SEP(" S13:", -9, 4) << Ff(t, "S[1][3]") << std::endl
992  << RM3(" S21:", -9, 4) << Ff(t, "S[2][1]") << SEP(" S22:", -9, 4) << Ff(t, "S[2][2]") << SEP(" S23:", -9, 4) << Ff(t, "S[2][3]") << std::endl
993  << RM3(" S31:", -9, 4) << Ff(t, "S[3][1]") << SEP(" S32:", -9, 4) << Ff(t, "S[3][2]") << SEP(" S33:", -9, 4) << Ff(t, "S[3][3]") << std::endl;
994  }
995 
996  pdbFile << RM3("") << std::endl;
997 }
998 
999 // --------------------------------------------------------------------
1000 
1001 void WriteRemark3CNS(std::ostream &pdbFile, const datablock &db)
1002 {
1003  auto refine = db["refine"].front();
1004  auto ls_shell = db["refine_ls_shell"].front();
1005  auto hist = db["refine_hist"].front();
1006  auto reflns = db["reflns"].front();
1007  auto analyze = db["refine_analyze"].front();
1008  auto &ls_restr = db["refine_ls_restr"];
1009  auto ls_restr_ncs = db["refine_ls_restr_ncs"].front();
1010  // auto pdbx_xplor_file = db["pdbx_xplor_file"].front();
1011  // auto pdbx_refine = db["pdbx_refine"].front();
1012 
1013  pdbFile << RM3("") << std::endl
1014  << RM3("REFINEMENT TARGET : ") << Fs(refine, "pdbx_stereochemistry_target_values") << std::endl
1015  << RM3("") << std::endl
1016  << RM3(" DATA USED IN REFINEMENT.") << std::endl
1017  << RM3(" RESOLUTION RANGE HIGH (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_high") << std::endl
1018  << RM3(" RESOLUTION RANGE LOW (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_low") << std::endl
1019  << RM3(" DATA CUTOFF (SIGMA(F)) : ", 6, 3) << Ff(refine, "pdbx_ls_sigma_F") << std::endl
1020  << RM3(" DATA CUTOFF HIGH (ABS(F)) : ", 6, 3) << Ff(refine, "pdbx_data_cutoff_high_absF") << std::endl
1021  << RM3(" DATA CUTOFF LOW (ABS(F)) : ", 7, 4) << Ff(refine, "pdbx_data_cutoff_low_absF") << std::endl
1022  << RM3(" COMPLETENESS (WORKING+TEST) (%) : ", 4, 1) << Ff(refine, "ls_percent_reflns_obs") << std::endl
1023  << RM3(" NUMBER OF REFLECTIONS : ", 12, 6) << Fi(refine, "ls_number_reflns_obs") << std::endl
1024 
1025  << RM3("") << std::endl
1026  << RM3(" FIT TO DATA USED IN REFINEMENT.") << std::endl
1027  << RM3(" CROSS-VALIDATION METHOD : ") << Fs(refine, "pdbx_ls_cross_valid_method") << std::endl
1028  << RM3(" FREE R VALUE TEST SET SELECTION : ") << Fs(refine, "pdbx_R_Free_selection_details") << std::endl
1029  // << RM3(" R VALUE (WORKING + TEST SET) : ", 7, 5) << Ff(refine, "ls_R_factor_obs") << std::endl
1030  << RM3(" R VALUE (WORKING SET) : ", 7, 3) << Ff(refine, "ls_R_factor_R_work") << std::endl
1031  << RM3(" FREE R VALUE : ", 7, 3) << Ff(refine, "ls_R_factor_R_free") << std::endl
1032  << RM3(" FREE R VALUE TEST SET SIZE (%) : ", 7, 3) << Ff(refine, "ls_percent_reflns_R_free") << std::endl
1033  << RM3(" FREE R VALUE TEST SET COUNT : ", 12, 6) << Fi(refine, "ls_number_reflns_R_free") << std::endl
1034  << RM3(" ESTIMATED ERROR OF FREE R VALUE : ", 7, 3) << Ff(refine, "ls_R_factor_R_free_error") << std::endl
1035 
1036  // << RM3("") << std::endl
1037  // << RM3(" FIT/AGREEMENT OF MODEL WITH ALL DATA.") << std::endl
1038  // << RM3(" R VALUE (WORKING + TEST SET, NO CUTOFF) : ", 7, 3) << Ff(pdbx_refine, "R_factor_all_no_cutoff") << std::endl
1039  // << RM3(" R VALUE (WORKING SET, NO CUTOFF) : ", 7, 3) << Ff(pdbx_refine, "R_factor_obs_no_cutoff") << std::endl
1040  // << RM3(" FREE R VALUE (NO CUTOFF) : ", 7, 3) << Ff(pdbx_refine, "free_R_factor_no_cutoff") << std::endl
1041  // << RM3(" FREE R VALUE TEST SET SIZE (%, NO CUTOFF) : ", 7, 3) << Ff(pdbx_refine, "free_R_val_test_set_size_perc_no_cutoff") << std::endl
1042  // << RM3(" FREE R VALUE TEST SET COUNT (NO CUTOFF) : ", 12, 6) << Fi(pdbx_refine, "free_R_val_test_set_ct_no_cutoff") << std::endl
1043  // << RM3(" TOTAL NUMBER OF REFLECTIONS (NO CUTOFF) : ", 12, 6) << Fi(refine, "ls_number_reflns_all") << std::endl
1044 
1045  << RM3("") << std::endl
1046  << RM3(" FIT IN THE HIGHEST RESOLUTION BIN.") << std::endl
1047  << RM3(" TOTAL NUMBER OF BINS USED : ", 12, 6) << Fi(ls_shell, "pdbx_total_number_of_bins_used") << std::endl
1048  << RM3(" BIN RESOLUTION RANGE HIGH (A) : ", 5, 2) << Ff(ls_shell, "d_res_high") << std::endl
1049  << RM3(" BIN RESOLUTION RANGE LOW (A) : ", 5, 2) << Ff(ls_shell, "d_res_low") << std::endl
1050  << RM3(" BIN COMPLETENESS (WORKING+TEST) (%) : ", 6, 2) << Ff(ls_shell, "percent_reflns_obs") << std::endl
1051  << RM3(" REFLECTIONS IN BIN (WORKING SET) : ", 12, 6) << Fi(ls_shell, "number_reflns_R_work") << std::endl
1052  << RM3(" BIN R VALUE (WORKING SET) : ", 8, 4) << Ff(ls_shell, "R_factor_R_work") << std::endl
1053  << RM3(" BIN FREE R VALUE : ", 8, 4) << Ff(ls_shell, "R_factor_R_free") << std::endl
1054  << RM3(" BIN FREE R VALUE TEST SET SIZE (%) : ", 6, 2) << Ff(ls_shell, "percent_reflns_R_free") << std::endl
1055  << RM3(" BIN FREE R VALUE TEST SET COUNT : ", 12, 7) << Fi(ls_shell, "number_reflns_R_free") << std::endl
1056  << RM3(" ESTIMATED ERROR OF BIN FREE R VALUE : ", 7, 3) << Ff(ls_shell, "R_factor_R_free_error") << std::endl
1057 
1058  << RM3("") << std::endl
1059  << RM3(" NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.") << std::endl
1060  << RM3(" PROTEIN ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_protein") << std::endl
1061  << RM3(" NUCLEIC ACID ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_nucleic_acid") << std::endl
1062  << RM3(" HETEROGEN ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_ligand") << std::endl
1063  << RM3(" SOLVENT ATOMS : ", 12, 6) << Fi(hist, "number_atoms_solvent") << std::endl
1064 
1065  << RM3("") << std::endl
1066  << RM3(" B VALUES.") << std::endl
1067  << RM3(" B VALUE TYPE : ") << Fs(refine, "pdbx_TLS_residual_ADP_flag") << std::endl
1068  << RM3(" FROM WILSON PLOT (A**2) : ", 7, 2) << Ff(reflns, "B_iso_Wilson_estimate") << std::endl
1069  << RM3(" MEAN B VALUE (OVERALL, A**2) : ", 7, 2) << Ff(refine, "B_iso_mean") << std::endl
1070 
1071  << RM3(" OVERALL ANISOTROPIC B VALUE.") << std::endl
1072  << RM3(" B11 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[1][1]") << std::endl
1073  << RM3(" B22 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[2][2]") << std::endl
1074  << RM3(" B33 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[3][3]") << std::endl
1075  << RM3(" B12 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[1][2]") << std::endl
1076  << RM3(" B13 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[1][3]") << std::endl
1077  << RM3(" B23 (A**2) : ", -8, 5) << Ff(refine, "aniso_B[2][3]") << std::endl
1078 
1079  << RM3("") << std::endl
1080  << RM3(" ESTIMATED COORDINATE ERROR.") << std::endl
1081  << RM3(" ESD FROM LUZZATI PLOT (A) : ", 7, 2) << Ff(analyze, "Luzzati_coordinate_error_obs") << std::endl
1082  << RM3(" ESD FROM SIGMAA (A) : ", 7, 2) << Ff(analyze, "Luzzati_sigma_a_obs") << std::endl
1083  << RM3(" LOW RESOLUTION CUTOFF (A) : ", 7, 2) << Ff(analyze, "Luzzati_d_res_low_obs") << std::endl
1084 
1085  << RM3("") << std::endl
1086  << RM3(" CROSS-VALIDATED ESTIMATED COORDINATE ERROR.") << std::endl
1087  << RM3(" ESD FROM C-V LUZZATI PLOT (A) : ", 7, 2) << Ff(analyze, "Luzzati_coordinate_error_free") << std::endl
1088  << RM3(" ESD FROM C-V SIGMAA (A) : ", 7, 2) << Ff(analyze, "Luzzati_sigma_a_free") << std::endl
1089 
1090  << RM3("") << std::endl
1091  << RM3(" RMS DEVIATIONS FROM IDEAL VALUES.") << std::endl
1092  << RM3(" BOND LENGTHS (A) : ", 7, 3) << Ff(ls_restr, key("type") == "c_bond_d", "dev_ideal") << std::endl
1093  << RM3(" BOND ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, key("type") == "c_angle_deg", "dev_ideal") << std::endl
1094  << RM3(" DIHEDRAL ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, key("type") == "c_dihedral_angle_d", "dev_ideal") << std::endl
1095  << RM3(" IMPROPER ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, key("type") == "c_improper_angle_d", "dev_ideal") << std::endl
1096 
1097  << RM3("") << std::endl
1098  << RM3(" ISOTROPIC THERMAL MODEL : ") << Fs(refine, "pdbx_isotropic_thermal_model") << std::endl
1099 
1100  << RM3("") << std::endl
1101  << RM3(" ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA") << std::endl
1102  << RM3(" MAIN-CHAIN BOND (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "c_mcbond_it", "dev_ideal") << SEP("; ", 7, 3)
1103  << Ff(ls_restr, key("type") == "c_mcbond_it", "dev_ideal_target") << std::endl
1104  << RM3(" MAIN-CHAIN ANGLE (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "c_mcangle_it", "dev_ideal") << SEP("; ", 7, 3)
1105  << Ff(ls_restr, key("type") == "c_mcangle_it", "dev_ideal_target") << std::endl
1106  << RM3(" SIDE-CHAIN BOND (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "c_scbond_it", "dev_ideal") << SEP("; ", 7, 3)
1107  << Ff(ls_restr, key("type") == "c_scbond_it", "dev_ideal_target") << std::endl
1108  << RM3(" SIDE-CHAIN ANGLE (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "c_scangle_it", "dev_ideal") << SEP("; ", 7, 3)
1109  << Ff(ls_restr, key("type") == "c_scangle_it", "dev_ideal_target") << std::endl
1110 
1111  << RM3("") << std::endl
1112  << RM3(" BULK SOLVENT MODELING.") << std::endl
1113  << RM3(" METHOD USED : ") << Fs(refine, "solvent_model_details") << std::endl
1114  << RM3(" KSOL : ", 5, 2) << Ff(refine, "solvent_model_param_ksol") << std::endl
1115  << RM3(" BSOL : ", 5, 2) << Ff(refine, "solvent_model_param_bsol") << std::endl
1116 
1117  << RM3("") << std::endl
1118  << RM3(" NCS MODEL : ") << Fs(ls_restr_ncs, "ncs_model_details") << std::endl
1119 
1120  << RM3("") << std::endl
1121  << RM3(" NCS RESTRAINTS. RMS SIGMA/WEIGHT") << std::endl
1122 
1123  // TODO: using only group 1 here, should this be fixed???
1124  << RM3(" GROUP 1 POSITIONAL (A) : ", 4, 2) << Ff(ls_restr_ncs, "rms_dev_position") << SEP("; ", 6, 2)
1125  << Ff(ls_restr_ncs, "weight_position") << SEP("; ", 6, 2) << std::endl
1126  << RM3(" GROUP 1 B-FACTOR (A**2) : ", 4, 2) << Ff(ls_restr_ncs, "rms_dev_B_iso") << SEP("; ", 6, 2)
1127  << Ff(ls_restr_ncs, "weight_B_iso") << SEP("; ", 6, 2) << std::endl
1128 
1129  // TODO: using only files from serial_no 1 here
1130  // << RM3("") << std::endl
1131  // << RM3(" PARAMETER FILE 1 : ") << Fs(pdbx_xplor_file, "param_file") << std::endl
1132  // << RM3(" TOPOLOGY FILE 1 : ") << Fs(pdbx_xplor_file, "topol_file") << std::endl
1133 
1134  << RM3("") << std::endl;
1135 }
1136 
1137 // --------------------------------------------------------------------
1138 
1139 void WriteRemark3Refmac(std::ostream &pdbFile, const datablock &db)
1140 {
1141  auto refine = db["refine"].front();
1142  auto ls_shell = db["refine_ls_shell"].front();
1143  auto hist = db["refine_hist"].front();
1144  auto reflns = db["reflns"].front();
1145  // auto analyze = db["refine_analyze"].front();
1146  auto &ls_restr = db["refine_ls_restr"];
1147  // auto pdbx_xplor_file = db["pdbx_xplor_file"].front();
1148 
1149  auto c = [](const char *t) -> condition
1150  { return key("type") == t; };
1151 
1152  pdbFile << RM3("") << std::endl
1153  << RM3("REFINEMENT TARGET : ") << Fs(refine, "pdbx_stereochemistry_target_values") << std::endl
1154  << RM3("") << std::endl
1155  << RM3(" DATA USED IN REFINEMENT.") << std::endl
1156  << RM3(" RESOLUTION RANGE HIGH (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_high") << std::endl
1157  << RM3(" RESOLUTION RANGE LOW (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_low") << std::endl
1158  << RM3(" DATA CUTOFF (SIGMA(F)) : ", 6, 3) << Ff(refine, "pdbx_ls_sigma_F") << std::endl
1159  << RM3(" COMPLETENESS FOR RANGE (%) : ", 5, 2) << Ff(refine, "ls_percent_reflns_obs") << std::endl
1160  << RM3(" NUMBER OF REFLECTIONS : ", 12, 6) << Fi(refine, "ls_number_reflns_obs") << std::endl
1161 
1162  << RM3("") << std::endl
1163  << RM3(" FIT TO DATA USED IN REFINEMENT.") << std::endl
1164  << RM3(" CROSS-VALIDATION METHOD : ") << Fs(refine, "pdbx_ls_cross_valid_method") << std::endl
1165  << RM3(" FREE R VALUE TEST SET SELECTION : ") << Fs(refine, "pdbx_R_Free_selection_details") << std::endl
1166  << RM3(" R VALUE (WORKING + TEST SET) : ", 7, 5) << Ff(refine, "ls_R_factor_obs") << std::endl
1167  << RM3(" R VALUE (WORKING SET) : ", 7, 5) << Ff(refine, "ls_R_factor_R_work") << std::endl
1168  << RM3(" FREE R VALUE : ", 7, 5) << Ff(refine, "ls_R_factor_R_free") << std::endl
1169  << RM3(" FREE R VALUE TEST SET SIZE (%) : ", 7, 1) << Ff(refine, "ls_percent_reflns_R_free") << std::endl
1170  << RM3(" FREE R VALUE TEST SET COUNT : ", 12, 6) << Fi(refine, "ls_number_reflns_R_free") << std::endl
1171  << RM3(" ESTIMATED ERROR OF FREE R VALUE : ", 7, 3) << Ff(refine, "ls_R_factor_R_free_error") << std::endl
1172 
1173  << RM3("") << std::endl
1174  << RM3(" FIT IN THE HIGHEST RESOLUTION BIN.") << std::endl
1175  << RM3(" TOTAL NUMBER OF BINS USED : ") << Fi(ls_shell, "pdbx_total_number_of_bins_used") << std::endl
1176  << RM3(" BIN RESOLUTION RANGE HIGH (A) : ", 5, 3) << Ff(ls_shell, "d_res_high") << std::endl
1177  << RM3(" BIN RESOLUTION RANGE LOW (A) : ", 5, 3) << Ff(ls_shell, "d_res_low") << std::endl
1178  << RM3(" REFLECTION IN BIN (WORKING SET) : ") << Fi(ls_shell, "number_reflns_R_work") << std::endl
1179  << RM3(" BIN COMPLETENESS (WORKING+TEST) (%) : ", 5, 2) << Ff(ls_shell, "percent_reflns_obs") << std::endl
1180  << RM3(" BIN R VALUE (WORKING SET) : ", 7, 3) << Ff(ls_shell, "R_factor_R_work") << std::endl
1181  << RM3(" BIN FREE R VALUE SET COUNT : ") << Fi(ls_shell, "number_reflns_R_free") << std::endl
1182  << RM3(" BIN FREE R VALUE : ", 7, 3) << Ff(ls_shell, "R_factor_R_free") << std::endl
1183 
1184  << RM3("") << std::endl
1185  << RM3(" NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.") << std::endl
1186  << RM3(" PROTEIN ATOMS : ") << Fi(hist, "pdbx_number_atoms_protein") << std::endl
1187  << RM3(" NUCLEIC ACID ATOMS : ") << Fi(hist, "pdbx_number_atoms_nucleic_acid") << std::endl
1188  << RM3(" HETEROGEN ATOMS : ") << Fi(hist, "pdbx_number_atoms_ligand") << std::endl
1189  << RM3(" SOLVENT ATOMS : ") << Fi(hist, "number_atoms_solvent") << std::endl
1190 
1191  << RM3("") << std::endl
1192  << RM3(" B VALUES.") << std::endl
1193  << RM3(" B VALUE TYPE : ") << Fs(refine, "pdbx_TLS_residual_ADP_flag") << std::endl
1194  << RM3(" FROM WILSON PLOT (A**2) : ", 8, 3) << Ff(reflns, "B_iso_Wilson_estimate") << std::endl
1195  << RM3(" MEAN B VALUE (OVERALL, A**2) : ", 8, 3) << Ff(refine, "B_iso_mean") << std::endl
1196 
1197  << RM3(" OVERALL ANISOTROPIC B VALUE.") << std::endl
1198  << RM3(" B11 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][1]") << std::endl
1199  << RM3(" B22 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[2][2]") << std::endl
1200  << RM3(" B33 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[3][3]") << std::endl
1201  << RM3(" B12 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][2]") << std::endl
1202  << RM3(" B13 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][3]") << std::endl
1203  << RM3(" B23 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[2][3]") << std::endl
1204 
1205  << RM3("") << std::endl
1206  << RM3(" ESTIMATED OVERALL COORDINATE ERROR.") << std::endl
1207  << RM3(" ESU BASED ON R VALUE (A): ", 6, 3) << Ff(refine, "pdbx_overall_ESU_R") << std::endl
1208  << RM3(" ESU BASED ON FREE R VALUE (A): ", 6, 3) << Ff(refine, "pdbx_overall_ESU_R_Free") << std::endl
1209  << RM3(" ESU BASED ON MAXIMUM LIKELIHOOD (A): ", 6, 3) << Ff(refine, "overall_SU_ML") << std::endl
1210  << RM3(" ESU FOR B VALUES BASED ON MAXIMUM LIKELIHOOD (A**2): ", 6, 3) << Ff(refine, "overall_SU_B") << std::endl
1211 
1212  << RM3("") << std::endl
1213  << RM3(" CORRELATION COEFFICIENTS.") << std::endl
1214  << RM3(" CORRELATION COEFFICIENT FO-FC : ", 6, 3) << Ff(refine, "correlation_coeff_Fo_to_Fc") << std::endl
1215  << RM3(" CORRELATION COEFFICIENT FO-FC FREE : ", 6, 3) << Ff(refine, "correlation_coeff_Fo_to_Fc_free") << std::endl
1216 
1217  << RM3("") << std::endl
1218  << RM3(" RMS DEVIATIONS FROM IDEAL VALUES COUNT RMS WEIGHT") << std::endl
1219  << RM3(" BOND LENGTHS REFINED ATOMS (A): ", -5) << Fi(ls_restr, c("r_bond_refined_d"), "number") << SEP(" ;", -6, 3)
1220  << Ff(ls_restr, c("r_bond_refined_d"), "dev_ideal") << SEP(" ;", -6, 3)
1221  << Ff(ls_restr, c("r_bond_refined_d"), "dev_ideal_target") << std::endl
1222  << RM3(" BOND LENGTHS OTHERS (A): ", -5) << Fi(ls_restr, c("r_bond_other_d"), "number") << SEP(" ;", -6, 3)
1223  << Ff(ls_restr, c("r_bond_other_d"), "dev_ideal") << SEP(" ;", -6, 3)
1224  << Ff(ls_restr, c("r_bond_other_d"), "dev_ideal_target") << std::endl
1225  << RM3(" BOND ANGLES REFINED ATOMS (DEGREES): ", -5) << Fi(ls_restr, c("r_angle_refined_deg"), "number") << SEP(" ;", -6, 3)
1226  << Ff(ls_restr, c("r_angle_refined_deg"), "dev_ideal") << SEP(" ;", -6, 3)
1227  << Ff(ls_restr, c("r_angle_refined_deg"), "dev_ideal_target") << std::endl
1228  << RM3(" BOND ANGLES OTHERS (DEGREES): ", -5) << Fi(ls_restr, c("r_angle_other_deg"), "number") << SEP(" ;", -6, 3)
1229  << Ff(ls_restr, c("r_angle_other_deg"), "dev_ideal") << SEP(" ;", -6, 3)
1230  << Ff(ls_restr, c("r_angle_other_deg"), "dev_ideal_target") << std::endl
1231  << RM3(" TORSION ANGLES, PERIOD 1 (DEGREES): ", -5) << Fi(ls_restr, c("r_dihedral_angle_1_deg"), "number") << SEP(" ;", -6, 3)
1232  << Ff(ls_restr, c("r_dihedral_angle_1_deg"), "dev_ideal") << SEP(" ;", -6, 3)
1233  << Ff(ls_restr, c("r_dihedral_angle_1_deg"), "dev_ideal_target") << std::endl
1234  << RM3(" TORSION ANGLES, PERIOD 2 (DEGREES): ", -5) << Fi(ls_restr, c("r_dihedral_angle_2_deg"), "number") << SEP(" ;", -6, 3)
1235  << Ff(ls_restr, c("r_dihedral_angle_2_deg"), "dev_ideal") << SEP(" ;", -6, 3)
1236  << Ff(ls_restr, c("r_dihedral_angle_2_deg"), "dev_ideal_target") << std::endl
1237  << RM3(" TORSION ANGLES, PERIOD 3 (DEGREES): ", -5) << Fi(ls_restr, c("r_dihedral_angle_3_deg"), "number") << SEP(" ;", -6, 3)
1238  << Ff(ls_restr, c("r_dihedral_angle_3_deg"), "dev_ideal") << SEP(" ;", -6, 3)
1239  << Ff(ls_restr, c("r_dihedral_angle_3_deg"), "dev_ideal_target") << std::endl
1240  << RM3(" TORSION ANGLES, PERIOD 4 (DEGREES): ", -5) << Fi(ls_restr, c("r_dihedral_angle_4_deg"), "number") << SEP(" ;", -6, 3)
1241  << Ff(ls_restr, c("r_dihedral_angle_4_deg"), "dev_ideal") << SEP(" ;", -6, 3)
1242  << Ff(ls_restr, c("r_dihedral_angle_4_deg"), "dev_ideal_target") << std::endl
1243  << RM3(" CHIRAL-CENTER RESTRAINTS (A**3): ", -5) << Fi(ls_restr, c("r_chiral_restr"), "number") << SEP(" ;", -6, 3)
1244  << Ff(ls_restr, c("r_chiral_restr"), "dev_ideal") << SEP(" ;", -6, 3)
1245  << Ff(ls_restr, c("r_chiral_restr"), "dev_ideal_target") << std::endl
1246  << RM3(" GENERAL PLANES REFINED ATOMS (A): ", -5) << Fi(ls_restr, c("r_gen_planes_refined"), "number") << SEP(" ;", -6, 3)
1247  << Ff(ls_restr, c("r_gen_planes_refined"), "dev_ideal") << SEP(" ;", -6, 3)
1248  << Ff(ls_restr, c("r_gen_planes_refined"), "dev_ideal_target") << std::endl
1249  << RM3(" GENERAL PLANES OTHERS (A): ", -5) << Fi(ls_restr, c("r_gen_planes_other"), "number") << SEP(" ;", -6, 3)
1250  << Ff(ls_restr, c("r_gen_planes_other"), "dev_ideal") << SEP(" ;", -6, 3)
1251  << Ff(ls_restr, c("r_gen_planes_other"), "dev_ideal_target") << std::endl
1252  << RM3(" NON-BONDED CONTACTS REFINED ATOMS (A): ", -5) << Fi(ls_restr, c("r_nbd_refined"), "number") << SEP(" ;", -6, 3)
1253  << Ff(ls_restr, c("r_nbd_refined"), "dev_ideal") << SEP(" ;", -6, 3)
1254  << Ff(ls_restr, c("r_nbd_refined"), "dev_ideal_target") << std::endl
1255  << RM3(" NON-BONDED CONTACTS OTHERS (A): ", -5) << Fi(ls_restr, c("r_nbd_other"), "number") << SEP(" ;", -6, 3)
1256  << Ff(ls_restr, c("r_nbd_other"), "dev_ideal") << SEP(" ;", -6, 3)
1257  << Ff(ls_restr, c("r_nbd_other"), "dev_ideal_target") << std::endl
1258  << RM3(" NON-BONDED TORSION REFINED ATOMS (A): ", -5) << Fi(ls_restr, c("r_nbtor_refined"), "number") << SEP(" ;", -6, 3)
1259  << Ff(ls_restr, c("r_nbtor_refined"), "dev_ideal") << SEP(" ;", -6, 3)
1260  << Ff(ls_restr, c("r_nbtor_refined"), "dev_ideal_target") << std::endl
1261  << RM3(" NON-BONDED TORSION OTHERS (A): ", -5) << Fi(ls_restr, c("r_nbtor_other"), "number") << SEP(" ;", -6, 3)
1262  << Ff(ls_restr, c("r_nbtor_other"), "dev_ideal") << SEP(" ;", -6, 3)
1263  << Ff(ls_restr, c("r_nbtor_other"), "dev_ideal_target") << std::endl
1264  << RM3(" H-BOND (X...Y) REFINED ATOMS (A): ", -5) << Fi(ls_restr, c("r_xyhbond_nbd_refined"), "number") << SEP(" ;", -6, 3)
1265  << Ff(ls_restr, c("r_xyhbond_nbd_refined"), "dev_ideal") << SEP(" ;", -6, 3)
1266  << Ff(ls_restr, c("r_xyhbond_nbd_refined"), "dev_ideal_target") << std::endl
1267  << RM3(" H-BOND (X...Y) OTHERS (A): ", -5) << Fi(ls_restr, c("r_xyhbond_nbd_other"), "number") << SEP(" ;", -6, 3)
1268  << Ff(ls_restr, c("r_xyhbond_nbd_other"), "dev_ideal") << SEP(" ;", -6, 3)
1269  << Ff(ls_restr, c("r_xyhbond_nbd_other"), "dev_ideal_target") << std::endl
1270  << RM3(" POTENTIAL METAL-ION REFINED ATOMS (A): ", -5) << Fi(ls_restr, c("r_metal_ion_refined"), "number") << SEP(" ;", -6, 3)
1271  << Ff(ls_restr, c("r_metal_ion_refined"), "dev_ideal") << SEP(" ;", -6, 3)
1272  << Ff(ls_restr, c("r_metal_ion_refined"), "dev_ideal_target") << std::endl
1273  << RM3(" POTENTIAL METAL-ION OTHERS (A): ", -5) << Fi(ls_restr, c("r_metal_ion_other"), "number") << SEP(" ;", -6, 3)
1274  << Ff(ls_restr, c("r_metal_ion_other"), "dev_ideal") << SEP(" ;", -6, 3)
1275  << Ff(ls_restr, c("r_metal_ion_other"), "dev_ideal_target") << std::endl
1276  << RM3(" SYMMETRY VDW REFINED ATOMS (A): ", -5) << Fi(ls_restr, c("r_symmetry_vdw_refined"), "number") << SEP(" ;", -6, 3)
1277  << Ff(ls_restr, c("r_symmetry_vdw_refined"), "dev_ideal") << SEP(" ;", -6, 3)
1278  << Ff(ls_restr, c("r_symmetry_vdw_refined"), "dev_ideal_target") << std::endl
1279  << RM3(" SYMMETRY VDW OTHERS (A): ", -5) << Fi(ls_restr, c("r_symmetry_vdw_other"), "number") << SEP(" ;", -6, 3)
1280  << Ff(ls_restr, c("r_symmetry_vdw_other"), "dev_ideal") << SEP(" ;", -6, 3)
1281  << Ff(ls_restr, c("r_symmetry_vdw_other"), "dev_ideal_target") << std::endl
1282  << RM3(" SYMMETRY H-BOND REFINED ATOMS (A): ", -5) << Fi(ls_restr, c("r_symmetry_hbond_refined"), "number") << SEP(" ;", -6, 3)
1283  << Ff(ls_restr, c("r_symmetry_hbond_refined"), "dev_ideal") << SEP(" ;", -6, 3)
1284  << Ff(ls_restr, c("r_symmetry_hbond_refined"), "dev_ideal_target") << std::endl
1285  << RM3(" SYMMETRY H-BOND OTHERS (A): ", -5) << Fi(ls_restr, c("r_symmetry_hbond_other"), "number") << SEP(" ;", -6, 3)
1286  << Ff(ls_restr, c("r_symmetry_hbond_other"), "dev_ideal") << SEP(" ;", -6, 3)
1287  << Ff(ls_restr, c("r_symmetry_hbond_other"), "dev_ideal_target") << std::endl
1288  << RM3(" SYMMETRY METAL-ION REFINED ATOMS (A): ", -5) << Fi(ls_restr, c("r_symmetry_metal_ion_refined"), "number") << SEP(" ;", -6, 3)
1289  << Ff(ls_restr, c("r_symmetry_metal_ion_refined"), "dev_ideal") << SEP(" ;", -6, 3)
1290  << Ff(ls_restr, c("r_symmetry_metal_ion_refined"), "dev_ideal_target") << std::endl
1291  << RM3(" SYMMETRY METAL-ION OTHERS (A): ", -5) << Fi(ls_restr, c("r_symmetry_metal_ion_other"), "number") << SEP(" ;", -6, 3)
1292  << Ff(ls_restr, c("r_symmetry_metal_ion_other"), "dev_ideal") << SEP(" ;", -6, 3)
1293  << Ff(ls_restr, c("r_symmetry_metal_ion_other"), "dev_ideal_target") << std::endl
1294 
1295  << RM3("") << std::endl
1296  << RM3(" ISOTROPIC THERMAL FACTOR RESTRAINTS. COUNT RMS WEIGHT") << std::endl
1297  << RM3(" MAIN-CHAIN BOND REFINED ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_mcbond_it"), "number") << SEP(" ;", -6, 3)
1298  << Ff(ls_restr, c("r_mcbond_it"), "dev_ideal") << SEP(" ;", -6, 3)
1299  << Ff(ls_restr, c("r_mcbond_it"), "dev_ideal_target") << std::endl
1300  << RM3(" MAIN-CHAIN BOND OTHER ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_mcbond_other"), "number") << SEP(" ;", -6, 3)
1301  << Ff(ls_restr, c("r_mcbond_other"), "dev_ideal") << SEP(" ;", -6, 3)
1302  << Ff(ls_restr, c("r_mcbond_other"), "dev_ideal_target") << std::endl
1303  << RM3(" MAIN-CHAIN ANGLE REFINED ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_mcangle_it"), "number") << SEP(" ;", -6, 3)
1304  << Ff(ls_restr, c("r_mcangle_it"), "dev_ideal") << SEP(" ;", -6, 3)
1305  << Ff(ls_restr, c("r_mcangle_it"), "dev_ideal_target") << std::endl
1306  << RM3(" MAIN-CHAIN ANGLE OTHER ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_mcangle_other"), "number") << SEP(" ;", -6, 3)
1307  << Ff(ls_restr, c("r_mcangle_other"), "dev_ideal") << SEP(" ;", -6, 3)
1308  << Ff(ls_restr, c("r_mcangle_other"), "dev_ideal_target") << std::endl
1309  << RM3(" SIDE-CHAIN BOND REFINED ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_scbond_it"), "number") << SEP(" ;", -6, 3)
1310  << Ff(ls_restr, c("r_scbond_it"), "dev_ideal") << SEP(" ;", -6, 3)
1311  << Ff(ls_restr, c("r_scbond_it"), "dev_ideal_target") << std::endl
1312  << RM3(" SIDE-CHAIN BOND OTHER ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_scbond_other"), "number") << SEP(" ;", -6, 3)
1313  << Ff(ls_restr, c("r_scbond_other"), "dev_ideal") << SEP(" ;", -6, 3)
1314  << Ff(ls_restr, c("r_scbond_other"), "dev_ideal_target") << std::endl
1315  << RM3(" SIDE-CHAIN ANGLE REFINED ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_scangle_it"), "number") << SEP(" ;", -6, 3)
1316  << Ff(ls_restr, c("r_scangle_it"), "dev_ideal") << SEP(" ;", -6, 3)
1317  << Ff(ls_restr, c("r_scangle_it"), "dev_ideal_target") << std::endl
1318  << RM3(" SIDE-CHAIN ANGLE OTHER ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_scangle_other"), "number") << SEP(" ;", -6, 3)
1319  << Ff(ls_restr, c("r_scangle_other"), "dev_ideal") << SEP(" ;", -6, 3)
1320  << Ff(ls_restr, c("r_scangle_other"), "dev_ideal_target") << std::endl
1321  << RM3(" LONG RANGE B REFINED ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_long_range_B_refined"), "number") << SEP(" ;", -6, 3)
1322  << Ff(ls_restr, c("r_long_range_B_refined"), "dev_ideal") << SEP(" ;", -6, 3)
1323  << Ff(ls_restr, c("r_long_range_B_refined"), "dev_ideal_target") << std::endl
1324  << RM3(" LONG RANGE B OTHER ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_long_range_B_other"), "number") << SEP(" ;", -6, 3)
1325  << Ff(ls_restr, c("r_long_range_B_other"), "dev_ideal") << SEP(" ;", -6, 3)
1326  << Ff(ls_restr, c("r_long_range_B_other"), "dev_ideal_target") << std::endl
1327 
1328  << RM3("") << std::endl
1329  << RM3(" ANISOTROPIC THERMAL FACTOR RESTRAINTS. COUNT RMS WEIGHT") << std::endl
1330  << RM3(" RIGID-BOND RESTRAINTS (A**2): ", -5) << Fi(ls_restr, c("r_rigid_bond_restr"), "number") << SEP(" ;", -6, 3)
1331  << Ff(ls_restr, c("r_rigid_bond_restr"), "dev_ideal") << SEP(" ;", -6, 3)
1332  << Ff(ls_restr, c("r_rigid_bond_restr"), "dev_ideal_target") << std::endl
1333  << RM3(" SPHERICITY; FREE ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_sphericity_free"), "number") << SEP(" ;", -6, 3)
1334  << Ff(ls_restr, c("r_sphericity_free"), "dev_ideal") << SEP(" ;", -6, 3)
1335  << Ff(ls_restr, c("r_sphericity_free"), "dev_ideal_target") << std::endl
1336  << RM3(" SPHERICITY; BONDED ATOMS (A**2): ", -5) << Fi(ls_restr, c("r_sphericity_bonded"), "number") << SEP(" ;", -6, 3)
1337  << Ff(ls_restr, c("r_sphericity_bonded"), "dev_ideal") << SEP(" ;", -6, 3)
1338  << Ff(ls_restr, c("r_sphericity_bonded"), "dev_ideal_target") << std::endl
1339 
1340  << RM3("") << std::endl
1341  << RM3(" NCS RESTRAINTS STATISTICS") << std::endl;
1342 
1343  auto &ncs_dom = db["struct_ncs_dom"];
1344  if (ncs_dom.empty())
1345  pdbFile << RM3(" NUMBER OF DIFFERENT NCS GROUPS : NULL") << std::endl;
1346  else
1347  {
1348  std::set<std::string> ncs_groups;
1349  for (auto i : ncs_dom)
1350  ncs_groups.insert(i["pdbx_ens_id"].as<std::string>());
1351 
1352  pdbFile << RM3(" NUMBER OF DIFFERENT NCS GROUPS : ") << ncs_groups.size() << std::endl;
1353 
1354  for (auto ens_id : ncs_groups)
1355  {
1356  auto lim = db["struct_ncs_dom_lim"].find(key("pdbx_ens_id") == ens_id);
1357 
1358  std::set<std::string> chains;
1359  std::set<int> component_ids;
1360 
1361  for (auto l : lim)
1362  {
1363  chains.insert(l["beg_auth_asym_id"].as<std::string>());
1364  component_ids.insert(l["pdbx_component_id"].as<int>());
1365  }
1366 
1367  pdbFile << RM3("") << std::endl
1368  << RM3(" NCS GROUP NUMBER : ") << ens_id << std::endl
1369  << RM3(" CHAIN NAMES : ") << join(chains, " ") << std::endl
1370  << RM3(" NUMBER OF COMPONENTS NCS GROUP : ") << component_ids.size() << std::endl
1371  << RM3(" COMPONENT C SSSEQI TO C SSSEQI CODE") << std::endl;
1372 
1373  for (auto l : lim)
1374  {
1375  pdbFile << RM3(" ", -2) << Fi(l, "pdbx_component_id")
1376  << SEP(" ", -5) << Fs(l, "beg_auth_asym_id")
1377  << SEP(" ", -5) << Fi(l, "beg_auth_seq_id")
1378  << SEP(" ", -5) << Fs(l, "end_auth_asym_id")
1379  << SEP(" ", -5) << Fi(l, "end_auth_seq_id")
1380  << SEP(" ", -5) << Fs(l, "pdbx_refine_code")
1381  << std::endl;
1382  }
1383 
1384  pdbFile << RM3(" GROUP CHAIN COUNT RMS WEIGHT") << std::endl;
1385  for (auto l : db["refine_ls_restr_ncs"].find(key("pdbx_ens_id") == ens_id))
1386  {
1387  std::string type = l["pdbx_type"].as<std::string>();
1388  to_upper(type);
1389 
1390  std::string unit;
1391  if (ends_with(type, "POSITIONAL"))
1392  unit = " (A): ";
1393  else if (ends_with(type, "THERMAL"))
1394  unit = " (A**2): ";
1395  else
1396  unit = " : ";
1397 
1398  pdbFile << RM3(" ", 18) << type
1399  << SEP("", -2) << Fs(l, "pdbx_ens_id")
1400  << SEP(" ", 1) << Fs(l, "pdbx_auth_asym_id")
1401  << SEP(unit.c_str(), -6) << Fi(l, "pdbx_number")
1402  << SEP(" ;", -6, 3) << Ff(l, "rms_dev_position")
1403  << SEP(" ;", -6, 3) << Ff(l, "weight_position")
1404  << std::endl;
1405  }
1406  }
1407  }
1408 
1409  // TODO: add twin information
1410 
1411  // { R"(TWIN DETAILS)", "", {} },
1412  // { R"(NUMBER OF TWIN DOMAINS)", "", {} },
1413 
1414  auto &tls = db["pdbx_refine_tls"];
1415 
1416  pdbFile << RM3("") << std::endl
1417  << RM3(" TLS DETAILS") << std::endl
1418  << RM3(" NUMBER OF TLS GROUPS : ") << (tls.size() ? std::to_string(tls.size()) : "NULL") << std::endl;
1419 
1420  for (auto t : tls)
1421  {
1422  std::string id = t["id"].as<std::string>();
1423  auto g = db["pdbx_refine_tls_group"].find(key("refine_tls_id") == id);
1424 
1425  pdbFile << RM3("") << std::endl
1426  << RM3(" TLS GROUP : ") << id << std::endl
1427  << RM3(" NUMBER OF COMPONENTS GROUP : ") << g.size() << std::endl
1428  << RM3(" COMPONENTS C SSSEQI TO C SSSEQI") << std::endl;
1429 
1430  for (auto gi : g)
1431  {
1432  pdbFile << RM3(" RESIDUE RANGE : ") << Fs(gi, "beg_auth_asym_id")
1433  << SEP("", -6) << Fi(gi, "beg_auth_seq_id")
1434  << SEP("", -9) << Fs(gi, "end_auth_asym_id")
1435  << SEP("", -6) << Fi(gi, "end_auth_seq_id")
1436  << std::endl;
1437  }
1438 
1439  pdbFile << RM3(" ORIGIN FOR THE GROUP (A):", -9, 4) << Ff(t, "origin_x")
1440  << SEP("", -9, 4) << Ff(t, "origin_y")
1441  << SEP("", -9, 4) << Ff(t, "origin_z") << std::endl
1442  << RM3(" T TENSOR") << std::endl
1443  << RM3(" T11:", -9, 4) << Ff(t, "T[1][1]") << SEP(" T22:", -9, 4) << Ff(t, "T[2][2]") << std::endl
1444  << RM3(" T33:", -9, 4) << Ff(t, "T[3][3]") << SEP(" T12:", -9, 4) << Ff(t, "T[1][2]") << std::endl
1445  << RM3(" T13:", -9, 4) << Ff(t, "T[1][3]") << SEP(" T23:", -9, 4) << Ff(t, "T[2][3]") << std::endl
1446  << RM3(" L TENSOR") << std::endl
1447  << RM3(" L11:", -9, 4) << Ff(t, "L[1][1]") << SEP(" L22:", -9, 4) << Ff(t, "L[2][2]") << std::endl
1448  << RM3(" L33:", -9, 4) << Ff(t, "L[3][3]") << SEP(" L12:", -9, 4) << Ff(t, "L[1][2]") << std::endl
1449  << RM3(" L13:", -9, 4) << Ff(t, "L[1][3]") << SEP(" L23:", -9, 4) << Ff(t, "L[2][3]") << std::endl
1450  << RM3(" S TENSOR") << std::endl
1451  << RM3(" S11:", -9, 4) << Ff(t, "S[1][1]") << SEP(" S12:", -9, 4) << Ff(t, "S[1][2]") << SEP(" S13:", -9, 4) << Ff(t, "S[1][3]") << std::endl
1452  << RM3(" S21:", -9, 4) << Ff(t, "S[2][1]") << SEP(" S22:", -9, 4) << Ff(t, "S[2][2]") << SEP(" S23:", -9, 4) << Ff(t, "S[2][3]") << std::endl
1453  << RM3(" S31:", -9, 4) << Ff(t, "S[3][1]") << SEP(" S32:", -9, 4) << Ff(t, "S[3][2]") << SEP(" S33:", -9, 4) << Ff(t, "S[3][3]") << std::endl;
1454  }
1455 
1456  pdbFile << RM3("") << std::endl
1457  << RM3(" BULK SOLVENT MODELLING.") << std::endl
1458  << RM3(" METHOD USED : ") << Fs(refine, "solvent_model_details") << std::endl
1459  << RM3(" PARAMETERS FOR MASK CALCULATION") << std::endl
1460  << RM3(" VDW PROBE RADIUS : ", 5, 2) << Ff(refine, "pdbx_solvent_vdw_probe_radii") << std::endl
1461  << RM3(" ION PROBE RADIUS : ", 5, 2) << Ff(refine, "pdbx_solvent_ion_probe_radii") << std::endl
1462  << RM3(" SHRINKAGE RADIUS : ", 5, 2) << Ff(refine, "pdbx_solvent_shrinkage_radii") << std::endl
1463 
1464  << RM3("") << std::endl;
1465 }
1466 
1467 void WriteRemark3Shelxl(std::ostream &pdbFile, const datablock &db)
1468 {
1469  auto refine = db["refine"].front();
1470  // auto ls_shell = db["refine_ls_shell"].front();
1471  auto refine_hist = db["refine_hist"].front();
1472  // auto reflns = db["reflns"].front();
1473  auto refine_analyze = db["refine_analyze"].front();
1474  auto &ls_restr = db["refine_ls_restr"];
1475  // auto pdbx_xplor_file = db["pdbx_xplor_file"].front();
1476  auto pdbx_refine = db["pdbx_refine"].front();
1477 
1478  auto c = [](const char *t) -> condition
1479  { return key("type") == t; };
1480 
1481  pdbFile << RM3("") << std::endl
1482  << RM3(" DATA USED IN REFINEMENT.") << std::endl
1483  << RM3(" RESOLUTION RANGE HIGH (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_high") << std::endl
1484  << RM3(" RESOLUTION RANGE LOW (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_low") << std::endl
1485  << RM3(" DATA CUTOFF (SIGMA(F)) : ", 6, 3) << Ff(refine, "pdbx_ls_sigma_F") << std::endl
1486  << RM3(" COMPLETENESS FOR RANGE (%) : ", 5, 2) << Ff(refine, "ls_percent_reflns_obs") << std::endl
1487  << RM3(" CROSS-VALIDATION METHOD : ") << Fs(refine, "pdbx_ls_cross_valid_method") << std::endl
1488  << RM3(" FREE R VALUE TEST SET SELECTION : ") << Fs(refine, "pdbx_R_Free_selection_details") << std::endl
1489 
1490  << RM3("") << std::endl
1491  << RM3(" FIT TO DATA USED IN REFINEMENT (NO CUTOFF).") << std::endl
1492  << RM3(" R VALUE (WORKING + TEST SET, NO CUTOFF) : ", 7, 3) << Ff(pdbx_refine, "R_factor_all_no_cutoff") << std::endl
1493  << RM3(" R VALUE (WORKING SET, NO CUTOFF) : ", 7, 3) << Ff(pdbx_refine, "R_factor_obs_no_cutoff") << std::endl
1494  << RM3(" FREE R VALUE (NO CUTOFF) : ", 7, 3) << Ff(pdbx_refine, "free_R_factor_no_cutoff") << std::endl
1495  << RM3(" FREE R VALUE TEST SET SIZE (%, NO CUTOFF) : ", 7, 3) << Ff(pdbx_refine, "free_R_val_test_set_size_perc_no_cutoff") << std::endl
1496  << RM3(" FREE R VALUE TEST SET COUNT (NO CUTOFF) : ", 12, 6) << Fi(pdbx_refine, "free_R_val_test_set_ct_no_cutoff") << std::endl
1497  << RM3(" TOTAL NUMBER OF REFLECTIONS (NO CUTOFF) : ", 12, 6) << Fi(refine, "ls_number_reflns_all") << std::endl
1498 
1499  << RM3("") << std::endl
1500  << RM3(" FIT/AGREEMENT OF MODEL FOR DATA WITH F>4SIG(F).") << std::endl
1501  << RM3(" R VALUE (WORKING + TEST SET, F>4SIG(F)) : ", 7, 3) << Ff(pdbx_refine, "R_factor_all_4sig_cutoff") << std::endl
1502  << RM3(" R VALUE (WORKING SET, F>4SIG(F)) : ", 7, 3) << Ff(pdbx_refine, "R_factor_obs_4sig_cutoff") << std::endl
1503  << RM3(" FREE R VALUE (F>4SIG(F)) : ", 7, 3) << Ff(pdbx_refine, "free_R_factor_4sig_cutoff") << std::endl
1504  << RM3(" FREE R VALUE TEST SET SIZE (%, F>4SIG(F)) : ", 7, 3) << Ff(pdbx_refine, "free_R_val_test_set_size_perc_4sig_cutoff") << std::endl
1505  << RM3(" FREE R VALUE TEST SET COUNT (F>4SIG(F)) : ") << Fi(pdbx_refine, "free_R_val_test_set_ct_4sig_cutoff") << std::endl
1506  << RM3(" TOTAL NUMBER OF REFLECTIONS (F>4SIG(F)) : ") << Fi(pdbx_refine, "number_reflns_obs_4sig_cutoff") << std::endl
1507 
1508  << RM3("") << std::endl
1509  << RM3(" NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.") << std::endl
1510  << RM3(" PROTEIN ATOMS : ") << Fi(refine_hist, "pdbx_number_atoms_protein") << std::endl
1511  << RM3(" NUCLEIC ACID ATOMS : ") << Fi(refine_hist, "pdbx_number_atoms_nucleic_acid") << std::endl
1512  << RM3(" HETEROGEN ATOMS : ") << Fi(refine_hist, "pdbx_number_atoms_ligand") << std::endl
1513  << RM3(" SOLVENT ATOMS : ") << Fi(refine_hist, "number_atoms_solvent") << std::endl
1514 
1515  << RM3("") << std::endl
1516  << RM3(" MODEL REFINEMENT.") << std::endl
1517  << RM3(" OCCUPANCY SUM OF NON-HYDROGEN ATOMS : ", 7, 3) << Ff(refine_analyze, "occupancy_sum_non_hydrogen") << std::endl
1518  << RM3(" OCCUPANCY SUM OF HYDROGEN ATOMS : ", 7, 3) << Ff(refine_analyze, "occupancy_sum_hydrogen") << std::endl
1519  << RM3(" NUMBER OF DISCRETELY DISORDERED RESIDUES : ") << Fi(refine_analyze, "number_disordered_residues") << std::endl
1520  << RM3(" NUMBER OF LEAST-SQUARES PARAMETERS : ") << Fi(refine, "ls_number_parameters") << std::endl
1521  << RM3(" NUMBER OF RESTRAINTS : ") << Fi(refine, "ls_number_restraints") << std::endl
1522 
1523  << RM3("") << std::endl
1524  << RM3(" RMS DEVIATIONS FROM RESTRAINT TARGET VALUES.") << std::endl
1525  << RM3(" BOND LENGTHS (A) : ", 7, 3) << Ff(ls_restr, c("s_bond_d"), "dev_ideal") << std::endl
1526  << RM3(" ANGLE DISTANCES (A) : ", 7, 3) << Ff(ls_restr, c("s_angle_d"), "dev_ideal") << std::endl
1527  << RM3(" SIMILAR DISTANCES (NO TARGET VALUES) (A) : ", 7, 3) << Ff(ls_restr, c("s_similar_dist"), "dev_ideal") << std::endl
1528  << RM3(" DISTANCES FROM RESTRAINT PLANES (A) : ", 7, 3) << Ff(ls_restr, c("s_from_restr_planes"), "dev_ideal") << std::endl
1529  << RM3(" ZERO CHIRAL VOLUMES (A**3) : ", 7, 3) << Ff(ls_restr, c("s_zero_chiral_vol"), "dev_ideal") << std::endl
1530  << RM3(" NON-ZERO CHIRAL VOLUMES (A**3) : ", 7, 3) << Ff(ls_restr, c("s_non_zero_chiral_vol"), "dev_ideal") << std::endl
1531  << RM3(" ANTI-BUMPING DISTANCE RESTRAINTS (A) : ", 7, 3) << Ff(ls_restr, c("s_anti_bump_dis_restr"), "dev_ideal") << std::endl
1532  << RM3(" RIGID-BOND ADP COMPONENTS (A**2) : ", 7, 3) << Ff(ls_restr, c("s_rigid_bond_adp_cmpnt"), "dev_ideal") << std::endl
1533  << RM3(" SIMILAR ADP COMPONENTS (A**2) : ", 7, 3) << Ff(ls_restr, c("s_similar_adp_cmpnt"), "dev_ideal") << std::endl
1534  << RM3(" APPROXIMATELY ISOTROPIC ADPS (A**2) : ", 7, 3) << Ff(ls_restr, c("s_approx_iso_adps"), "dev_ideal") << std::endl
1535 
1536  << RM3("") << std::endl
1537  << RM3(" BULK SOLVENT MODELING.") << std::endl
1538  << RM3(" METHOD USED: ") << Fs(refine, "solvent_model_details") << std::endl
1539 
1540  << RM3("") << std::endl
1541  << RM3(" STEREOCHEMISTRY TARGET VALUES : ") << Fs(refine, "pdbx_stereochemistry_target_values") << std::endl
1542  << RM3(" SPECIAL CASE: ") << Fs(refine, "pdbx_stereochem_target_val_spec_case") << std::endl
1543 
1544  << RM3("") << std::endl;
1545 }
1546 
1547 void WriteRemark3Phenix(std::ostream &pdbFile, const datablock &db)
1548 {
1549  auto refine = db["refine"].front();
1550  // auto ls_shell = db["refine_ls_shell"].front();
1551  // auto hist = db["refine_hist"].front();
1552  auto reflns = db["reflns"].front();
1553  // auto analyze = db["refine_analyze"].front();
1554  auto &ls_restr = db["refine_ls_restr"];
1555  // auto pdbx_xplor_file = db["pdbx_xplor_file"].front();
1556  auto pdbx_reflns_twin = db["pdbx_reflns_twin"].front();
1557 
1558  auto c = [](const char *t) -> condition
1559  { return key("type") == t; };
1560 
1561  pdbFile << RM3("") << std::endl
1562  << RM3(" REFINEMENT TARGET : ") << Fs(refine, "pdbx_stereochemistry_target_values") << std::endl
1563  << RM3("") << std::endl
1564  << RM3(" DATA USED IN REFINEMENT.") << std::endl
1565  << RM3(" RESOLUTION RANGE HIGH (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_high") << std::endl
1566  << RM3(" RESOLUTION RANGE LOW (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_low") << std::endl
1567  << RM3(" MIN(FOBS/SIGMA_FOBS) : ", 6, 3) << Ff(refine, "pdbx_ls_sigma_F") << std::endl
1568  << RM3(" COMPLETENESS FOR RANGE (%) : ", 5, 2) << Ff(refine, "ls_percent_reflns_obs") << std::endl
1569  << RM3(" NUMBER OF REFLECTIONS : ", 12, 6) << Fi(refine, "ls_number_reflns_obs") << std::endl
1570  << RM3("") << std::endl
1571  << RM3(" FIT TO DATA USED IN REFINEMENT.") << std::endl
1572  << RM3(" R VALUE (WORKING + TEST SET) : ", 7, 5) << Ff(refine, "ls_R_factor_obs") << std::endl
1573  << RM3(" R VALUE (WORKING SET) : ", 7, 5) << Ff(refine, "ls_R_factor_R_work") << std::endl
1574  << RM3(" FREE R VALUE : ", 7, 5) << Ff(refine, "ls_R_factor_R_free") << std::endl
1575  << RM3(" FREE R VALUE TEST SET SIZE (%) : ", 7, 3) << Ff(refine, "ls_percent_reflns_R_free") << std::endl
1576  << RM3(" FREE R VALUE TEST SET COUNT : ", 12, 6) << Fi(refine, "ls_number_reflns_R_free") << std::endl
1577 
1578  << RM3("") << std::endl
1579  << RM3(" FIT TO DATA USED IN REFINEMENT (IN BINS).") << std::endl
1580  << RM3(" BIN RESOLUTION RANGE COMPL. NWORK NFREE RWORK RFREE") << std::endl;
1581 
1582  int bin = 1;
1583  std::vector<row_handle> bins;
1584  for (auto r : db["refine_ls_shell"])
1585  bins.push_back(r);
1586  // reverse(bins.begin(), bins.end());
1587  try
1588  {
1589  sort(bins.begin(), bins.end(), [](row_handle a, row_handle b) -> bool
1590  { return a["d_res_high"].as<float>() > b["d_res_high"].as<float>(); });
1591  }
1592  catch (...)
1593  {
1594  }
1595 
1596  for (auto r : bins)
1597  {
1598  float d_res_low, d_res_high, percent_reflns_obs, R_factor_R_work, R_factor_R_free;
1599  int number_reflns_R_work, number_reflns_R_free;
1600 
1601  tie(d_res_low, d_res_high, percent_reflns_obs, number_reflns_R_work,
1602  number_reflns_R_free, R_factor_R_work, R_factor_R_free) =
1603  r.get("d_res_low", "d_res_high", "percent_reflns_obs", "number_reflns_R_work",
1604  "number_reflns_R_free", "R_factor_R_work", "R_factor_R_free");
1605 
1606  percent_reflns_obs /= 100;
1607 
1608  pdbFile << RM3(" ") << cif::format("%3d %7.4f - %7.4f %4.2f %8d %5d %6.4f %6.4f", bin++, d_res_low, d_res_high, percent_reflns_obs, number_reflns_R_work, number_reflns_R_free, R_factor_R_work, R_factor_R_free) << std::endl;
1609  }
1610 
1611  pdbFile << RM3("") << std::endl
1612  << RM3(" BULK SOLVENT MODELLING.") << std::endl
1613  << RM3(" METHOD USED : ") << Fs(refine, "solvent_model_details") << std::endl
1614  << RM3(" SOLVENT RADIUS : ", 5, 2) << Ff(refine, "pdbx_solvent_vdw_probe_radii") << std::endl
1615  << RM3(" SHRINKAGE RADIUS : ", 5, 2) << Ff(refine, "pdbx_solvent_shrinkage_radii") << std::endl
1616  << RM3(" K_SOL : ", 5, 2) << Ff(refine, "solvent_model_param_ksol") << std::endl
1617  << RM3(" B_SOL : ", 5, 2) << Ff(refine, "solvent_model_param_bsol") << std::endl
1618 
1619  << RM3("") << std::endl
1620  << RM3(" ERROR ESTIMATES.") << std::endl
1621  << RM3(" COORDINATE ERROR (MAXIMUM-LIKELIHOOD BASED) : ", 6, 3) << Ff(refine, "overall_SU_ML") << std::endl
1622  << RM3(" PHASE ERROR (DEGREES, MAXIMUM-LIKELIHOOD BASED) : ", 6, 3) << Ff(refine, "pdbx_overall_phase_error") << std::endl
1623 
1624  << RM3("") << std::endl
1625  << RM3(" B VALUES.") << std::endl
1626  << RM3(" B VALUE TYPE : ") << Fs(refine, "pdbx_TLS_residual_ADP_flag") << std::endl
1627  << RM3(" FROM WILSON PLOT (A**2) : ", 7, 4) << Ff(reflns, "B_iso_Wilson_estimate") << std::endl
1628  << RM3(" MEAN B VALUE (OVERALL, A**2) : ", 7, 4) << Ff(refine, "B_iso_mean") << std::endl
1629  << RM3(" OVERALL ANISOTROPIC B VALUE.") << std::endl
1630  << RM3(" B11 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][1]") << std::endl
1631  << RM3(" B22 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[2][2]") << std::endl
1632  << RM3(" B33 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[3][3]") << std::endl
1633  << RM3(" B12 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][2]") << std::endl
1634  << RM3(" B13 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][3]") << std::endl
1635  << RM3(" B23 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[2][3]") << std::endl
1636 
1637  << RM3("") << std::endl
1638  << RM3(" TWINNING INFORMATION.") << std::endl
1639  << RM3(" FRACTION: ") << Fs(pdbx_reflns_twin, "fraction") << std::endl
1640  << RM3(" OPERATOR: ") << Fs(pdbx_reflns_twin, "operator") << std::endl
1641 
1642  << RM3("") << std::endl
1643  << RM3(" DEVIATIONS FROM IDEAL VALUES.") << std::endl
1644  << RM3(" RMSD COUNT") << std::endl
1645  << RM3(" BOND : ", -6, 3) << Ff(ls_restr, c("f_bond_d"), "dev_ideal") << SEP(" ", -7)
1646  << Fi(ls_restr, c("f_bond_d"), "number")
1647  << std::endl
1648  << RM3(" ANGLE : ", -6, 3) << Ff(ls_restr, c("f_angle_d"), "dev_ideal") << SEP(" ", -7)
1649  << Fi(ls_restr, c("f_angle_d"), "number")
1650  << std::endl
1651  << RM3(" CHIRALITY : ", -6, 3) << Ff(ls_restr, c("f_chiral_restr"), "dev_ideal") << SEP(" ", -7)
1652  << Fi(ls_restr, c("f_chiral_restr"), "number")
1653  << std::endl
1654  << RM3(" PLANARITY : ", -6, 3) << Ff(ls_restr, c("f_plane_restr"), "dev_ideal") << SEP(" ", -7)
1655  << Fi(ls_restr, c("f_plane_restr"), "number")
1656  << std::endl
1657  << RM3(" DIHEDRAL : ", -6, 3) << Ff(ls_restr, c("f_dihedral_angle_d"), "dev_ideal") << SEP(" ", -7)
1658  << Fi(ls_restr, c("f_dihedral_angle_d"), "number")
1659  << std::endl;
1660 
1661  auto &tls = db["pdbx_refine_tls"];
1662 
1663  pdbFile << RM3("") << std::endl
1664  << RM3(" TLS DETAILS") << std::endl
1665  << RM3(" NUMBER OF TLS GROUPS : ") << (tls.size() ? std::to_string(tls.size()) : "NULL") << std::endl;
1666 
1667  for (auto t : tls)
1668  {
1669  std::string id = t["id"].as<std::string>();
1670 
1671  auto pdbx_refine_tls_group = db["pdbx_refine_tls_group"].find_first(key("refine_tls_id") == id);
1672 
1673  pdbFile << RM3(" TLS GROUP : ") << id << std::endl
1674  << RM3(" SELECTION: ") << Fs(pdbx_refine_tls_group, "selection_details") << std::endl
1675  << RM3(" ORIGIN FOR THE GROUP (A):", -9, 4) << Ff(t, "origin_x")
1676  << SEP("", -9, 4) << Ff(t, "origin_y")
1677  << SEP("", -9, 4) << Ff(t, "origin_z") << std::endl
1678  << RM3(" T TENSOR") << std::endl
1679  << RM3(" T11:", -9, 4) << Ff(t, "T[1][1]") << SEP(" T22:", -9, 4) << Ff(t, "T[2][2]") << std::endl
1680  << RM3(" T33:", -9, 4) << Ff(t, "T[3][3]") << SEP(" T12:", -9, 4) << Ff(t, "T[1][2]") << std::endl
1681  << RM3(" T13:", -9, 4) << Ff(t, "T[1][3]") << SEP(" T23:", -9, 4) << Ff(t, "T[2][3]") << std::endl
1682  << RM3(" L TENSOR") << std::endl
1683  << RM3(" L11:", -9, 4) << Ff(t, "L[1][1]") << SEP(" L22:", -9, 4) << Ff(t, "L[2][2]") << std::endl
1684  << RM3(" L33:", -9, 4) << Ff(t, "L[3][3]") << SEP(" L12:", -9, 4) << Ff(t, "L[1][2]") << std::endl
1685  << RM3(" L13:", -9, 4) << Ff(t, "L[1][3]") << SEP(" L23:", -9, 4) << Ff(t, "L[2][3]") << std::endl
1686  << RM3(" S TENSOR") << std::endl
1687  << RM3(" S11:", -9, 4) << Ff(t, "S[1][1]") << SEP(" S12:", -9, 4) << Ff(t, "S[1][2]") << SEP(" S13:", -9, 4) << Ff(t, "S[1][3]") << std::endl
1688  << RM3(" S21:", -9, 4) << Ff(t, "S[2][1]") << SEP(" S22:", -9, 4) << Ff(t, "S[2][2]") << SEP(" S23:", -9, 4) << Ff(t, "S[2][3]") << std::endl
1689  << RM3(" S31:", -9, 4) << Ff(t, "S[3][1]") << SEP(" S32:", -9, 4) << Ff(t, "S[3][2]") << SEP(" S33:", -9, 4) << Ff(t, "S[3][3]") << std::endl;
1690  }
1691 
1692  pdbFile << RM3("") << std::endl
1693  << RM3(" NCS DETAILS") << std::endl;
1694 
1695  auto &ncs_dom = db["struct_ncs_dom"];
1696  if (ncs_dom.empty())
1697  pdbFile << RM3(" NUMBER OF NCS GROUPS : NULL") << std::endl;
1698  else
1699  {
1700  std::set<std::string> ncs_groups;
1701  for (auto i : ncs_dom)
1702  ncs_groups.insert(i["pdbx_ens_id"].as<std::string>());
1703 
1704  pdbFile << RM3(" NUMBER OF NCS GROUPS : ") << ncs_groups.size() << std::endl;
1705  //
1706  // for (auto ens_id: ncs_groups)
1707  // {
1708  // auto lim = db["struct_ncs_dom_lim"].find(key("pdbx_ens_id") == ens_id);
1709  //
1710  // set<std::string> chains;
1711  // set<int> component_ids;
1712  //
1713  // for (auto l: lim)
1714  // {
1715  // chains.insert(l["beg_auth_asym_id"]);
1716  // component_ids.insert(l["pdbx_component_id"].as<int>());
1717  // }
1718  //
1719  // pdbFile << RM3("") << std::endl
1720  // << RM3(" NCS GROUP NUMBER : ") << ens_id << std::endl
1721  // << RM3(" CHAIN NAMES : ") << join(chains, " ") << std::endl
1722  // << RM3(" NUMBER OF COMPONENTS NCS GROUP : ") << component_ids.size() << std::endl
1723  // << RM3(" COMPONENT C SSSEQI TO C SSSEQI CODE") << std::endl;
1724  //
1725  // for (auto l: lim)
1726  // {
1727  // pdbFile << RM3(" ", -2) << Fi(l, "pdbx_component_id")
1728  // << SEP(" ", -5) << Fs(l, "beg_auth_asym_id")
1729  // << SEP(" ", -5) << Fi(l, "beg_auth_seq_id")
1730  // << SEP(" ", -5) << Fs(l, "end_auth_asym_id")
1731  // << SEP(" ", -5) << Fi(l, "end_auth_seq_id")
1732  // << SEP(" ", -5) << Fs(l, "pdbx_refine_code")
1733  // << std::endl;
1734  // }
1735  //
1736  // pdbFile << RM3(" GROUP CHAIN COUNT RMS WEIGHT") << std::endl;
1737  // for (auto l: db["refine_ls_restr_ncs"].find(key("pdbx_ens_id") == ens_id))
1738  // {
1739  // std::string type = l["pdbx_type"];
1740  // to_upper(type);
1741  //
1742  // std::string unit;
1743  // if (ends_with(type, "POSITIONAL"))
1744  // unit = " (A): ";
1745  // else if (ends_with(type, "THERMAL"))
1746  // unit = " (A**2): ";
1747  // else
1748  // unit = " : ";
1749  //
1750  // pdbFile << RM3(" ", 18) << type
1751  // << SEP("", -2) << Fi(l, "pdbx_ens_id")
1752  // << SEP(" ", 1) << Fs(l, "pdbx_auth_asym_id")
1753  // << SEP(unit.c_str(), -6) << Fi(l, "pdbx_number")
1754  // << SEP(" ;", -6, 3) << Ff(l, "rms_dev_position")
1755  // << SEP(" ;", -6, 3) << Ff(l, "weight_position")
1756  // << std::endl;
1757  // }
1758  // }
1759  }
1760 
1761  // pdbFile << RM3("") << std::endl
1762  // << RM3(" BULK SOLVENT MODELLING.") << std::endl
1763  // << RM3(" METHOD USED : ") << Fs(refine, "solvent_model_details") << std::endl
1764  // << RM3(" PARAMETERS FOR MASK CALCULATION") << std::endl
1765  // << RM3(" VDW PROBE RADIUS : ", 5, 2) << Ff(refine, "pdbx_solvent_vdw_probe_radii") << std::endl
1766  // << RM3(" ION PROBE RADIUS : ", 5, 2) << Ff(refine, "pdbx_solvent_ion_probe_radii") << std::endl
1767  // << RM3(" SHRINKAGE RADIUS : ", 5, 2) << Ff(refine, "pdbx_solvent_shrinkage_radii") << std::endl
1768  //
1769  // << RM3("") << std::endl;
1770 
1771  pdbFile << RM3("") << std::endl;
1772 }
1773 
1774 void WriteRemark3XPlor(std::ostream &pdbFile, const datablock &db)
1775 {
1776  auto refine = db["refine"].front();
1777  auto ls_shell = db["refine_ls_shell"].front();
1778  auto hist = db["refine_hist"].front();
1779  auto reflns = db["reflns"].front();
1780  auto analyze = db["refine_analyze"].front();
1781  auto &ls_restr = db["refine_ls_restr"];
1782  auto ls_restr_ncs = db["refine_ls_restr_ncs"].front();
1783  auto pdbx_xplor_file = db["pdbx_xplor_file"].front();
1784 
1785  pdbFile << RM3("") << std::endl
1786  << RM3(" DATA USED IN REFINEMENT.") << std::endl
1787  << RM3(" RESOLUTION RANGE HIGH (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_high") << std::endl
1788  << RM3(" RESOLUTION RANGE LOW (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_low") << std::endl
1789  << RM3(" DATA CUTOFF (SIGMA(F)) : ", 6, 3) << Ff(refine, "pdbx_ls_sigma_F") << std::endl
1790  << RM3(" DATA CUTOFF HIGH (ABS(F)) : ", 6, 3) << Ff(refine, "pdbx_data_cutoff_high_absF") << std::endl
1791  << RM3(" DATA CUTOFF LOW (ABS(F)) : ", 6, 3) << Ff(refine, "pdbx_data_cutoff_low_absF") << std::endl
1792  << RM3(" COMPLETENESS (WORKING+TEST) (%) : ", 5, 2) << Ff(refine, "ls_percent_reflns_obs") << std::endl
1793  << RM3(" NUMBER OF REFLECTIONS : ", 12, 6) << Fi(refine, "ls_number_reflns_obs") << std::endl
1794 
1795  << RM3("") << std::endl
1796  << RM3(" FIT TO DATA USED IN REFINEMENT.") << std::endl
1797  << RM3(" CROSS-VALIDATION METHOD : ") << Fs(refine, "pdbx_ls_cross_valid_method") << std::endl
1798  << RM3(" FREE R VALUE TEST SET SELECTION : ") << Fs(refine, "pdbx_R_Free_selection_details") << std::endl
1799  << RM3(" R VALUE (WORKING SET) : ", 7, 3) << Ff(refine, "ls_R_factor_R_work") << std::endl
1800  << RM3(" FREE R VALUE : ", 7, 3) << Ff(refine, "ls_R_factor_R_free") << std::endl
1801  << RM3(" FREE R VALUE TEST SET SIZE (%) : ", 7, 3) << Ff(refine, "ls_percent_reflns_R_free") << std::endl
1802  << RM3(" FREE R VALUE TEST SET COUNT : ", 12, 6) << Fi(refine, "ls_number_reflns_R_free") << std::endl
1803  << RM3(" ESTIMATED ERROR OF FREE R VALUE : ", 7, 3) << Ff(refine, "ls_R_factor_R_free_error") << std::endl
1804 
1805  << RM3("") << std::endl
1806  << RM3(" FIT IN THE HIGHEST RESOLUTION BIN.") << std::endl
1807  << RM3(" TOTAL NUMBER OF BINS USED : ", 12, 6) << Fi(ls_shell, "pdbx_total_number_of_bins_used") << std::endl
1808  << RM3(" BIN RESOLUTION RANGE HIGH (A) : ", 5, 2) << Ff(ls_shell, "d_res_high") << std::endl
1809  << RM3(" BIN RESOLUTION RANGE LOW (A) : ", 5, 2) << Ff(ls_shell, "d_res_low") << std::endl
1810  << RM3(" BIN COMPLETENESS (WORKING+TEST) (%) : ", 5, 1) << Ff(ls_shell, "percent_reflns_obs") << std::endl
1811  << RM3(" REFLECTIONS IN BIN (WORKING SET) : ", 12, 6) << Fi(ls_shell, "number_reflns_R_work") << std::endl
1812  << RM3(" BIN R VALUE (WORKING SET) : ", 7, 3) << Ff(ls_shell, "R_factor_R_work") << std::endl
1813  << RM3(" BIN FREE R VALUE : ", 7, 3) << Ff(ls_shell, "R_factor_R_free") << std::endl
1814  << RM3(" BIN FREE R VALUE TEST SET SIZE (%) : ", 5, 1) << Ff(ls_shell, "percent_reflns_R_free") << std::endl
1815  << RM3(" BIN FREE R VALUE TEST SET COUNT : ", 12, 6) << Fi(ls_shell, "number_reflns_R_free") << std::endl
1816  << RM3(" ESTIMATED ERROR OF BIN FREE R VALUE : ", 7, 3) << Ff(ls_shell, "R_factor_R_free_error") << std::endl
1817 
1818  << RM3("") << std::endl
1819  << RM3(" NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.") << std::endl
1820  << RM3(" PROTEIN ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_protein") << std::endl
1821  << RM3(" NUCLEIC ACID ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_nucleic_acid") << std::endl
1822  << RM3(" HETEROGEN ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_ligand") << std::endl
1823  << RM3(" SOLVENT ATOMS : ", 12, 6) << Fi(hist, "number_atoms_solvent") << std::endl
1824 
1825  << RM3("") << std::endl
1826  << RM3(" B VALUES.") << std::endl
1827  << RM3(" FROM WILSON PLOT (A**2) : ", 7, 2) << Ff(reflns, "B_iso_Wilson_estimate") << std::endl
1828  << RM3(" MEAN B VALUE (OVERALL, A**2) : ", 7, 2) << Ff(refine, "B_iso_mean") << std::endl
1829 
1830  << RM3(" OVERALL ANISOTROPIC B VALUE.") << std::endl
1831  << RM3(" B11 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][1]") << std::endl
1832  << RM3(" B22 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[2][2]") << std::endl
1833  << RM3(" B33 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[3][3]") << std::endl
1834  << RM3(" B12 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][2]") << std::endl
1835  << RM3(" B13 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][3]") << std::endl
1836  << RM3(" B23 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[2][3]") << std::endl
1837 
1838  << RM3("") << std::endl
1839  << RM3(" ESTIMATED COORDINATE ERROR.") << std::endl
1840  << RM3(" ESD FROM LUZZATI PLOT (A) : ", 7, 2) << Ff(analyze, "Luzzati_coordinate_error_obs") << std::endl
1841  << RM3(" ESD FROM SIGMAA (A) : ", 7, 2) << Ff(analyze, "Luzzati_sigma_a_obs") << std::endl
1842  << RM3(" LOW RESOLUTION CUTOFF (A) : ", 7, 2) << Ff(analyze, "Luzzati_d_res_low_obs") << std::endl
1843 
1844  << RM3("") << std::endl
1845  << RM3(" CROSS-VALIDATED ESTIMATED COORDINATE ERROR.") << std::endl
1846  << RM3(" ESD FROM C-V LUZZATI PLOT (A) : ", 7, 2) << Ff(analyze, "Luzzati_coordinate_error_free") << std::endl
1847  << RM3(" ESD FROM C-V SIGMAA (A) : ", 7, 2) << Ff(analyze, "Luzzati_sigma_a_free") << std::endl
1848 
1849  << RM3("") << std::endl
1850  << RM3(" RMS DEVIATIONS FROM IDEAL VALUES.") << std::endl
1851  << RM3(" BOND LENGTHS (A) : ", 7, 3) << Ff(ls_restr, key("type") == "x_bond_d", "dev_ideal") << std::endl
1852  << RM3(" BOND ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, key("type") == "x_angle_deg", "dev_ideal") << std::endl
1853  << RM3(" DIHEDRAL ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, key("type") == "x_dihedral_angle_d", "dev_ideal") << std::endl
1854  << RM3(" IMPROPER ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, key("type") == "x_improper_angle_d", "dev_ideal") << std::endl
1855 
1856  << RM3("") << std::endl
1857  << RM3(" ISOTROPIC THERMAL MODEL : ") << Fs(refine, "pdbx_isotropic_thermal_model") << std::endl
1858 
1859  << RM3("") << std::endl
1860  << RM3(" ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA") << std::endl
1861  << RM3(" MAIN-CHAIN BOND (A**2) : ", 6, 2) << Ff(ls_restr, key("type") == "x_mcbond_it", "dev_ideal") << SEP("; ", 6, 2)
1862  << Ff(ls_restr, key("type") == "x_mcbond_it", "dev_ideal_target") << std::endl
1863  << RM3(" MAIN-CHAIN ANGLE (A**2) : ", 6, 2) << Ff(ls_restr, key("type") == "x_mcangle_it", "dev_ideal") << SEP("; ", 6, 2)
1864  << Ff(ls_restr, key("type") == "x_mcangle_it", "dev_ideal_target") << std::endl
1865  << RM3(" SIDE-CHAIN BOND (A**2) : ", 6, 2) << Ff(ls_restr, key("type") == "x_scbond_it", "dev_ideal") << SEP("; ", 6, 2)
1866  << Ff(ls_restr, key("type") == "x_scbond_it", "dev_ideal_target") << std::endl
1867  << RM3(" SIDE-CHAIN ANGLE (A**2) : ", 6, 2) << Ff(ls_restr, key("type") == "x_scangle_it", "dev_ideal") << SEP("; ", 6, 2)
1868  << Ff(ls_restr, key("type") == "x_scangle_it", "dev_ideal_target") << std::endl
1869  << RM3("") << std::endl
1870  << RM3(" NCS MODEL : ") << Fs(ls_restr_ncs, "ncs_model_details") << std::endl
1871 
1872  << RM3("") << std::endl
1873  << RM3(" NCS RESTRAINTS. RMS SIGMA/WEIGHT") << std::endl
1874 
1875  // TODO: using only group 1 here, should this be fixed???
1876  << RM3(" GROUP 1 POSITIONAL (A) : ", 4, 2) << Ff(ls_restr_ncs, "rms_dev_position") << SEP("; ", 6, 2)
1877  << Ff(ls_restr_ncs, "weight_position") << SEP("; ", 6, 2) << std::endl
1878  << RM3(" GROUP 1 B-FACTOR (A**2) : ", 4, 2) << Ff(ls_restr_ncs, "rms_dev_B_iso") << SEP("; ", 6, 2)
1879  << Ff(ls_restr_ncs, "weight_B_iso") << SEP("; ", 6, 2) << std::endl
1880 
1881  // TODO: using only files from serial_no 1 here
1882  << RM3("") << std::endl
1883  << RM3(" PARAMETER FILE 1 : ") << Fs(pdbx_xplor_file, "param_file") << std::endl
1884  << RM3(" TOPOLOGY FILE 1 : ") << Fs(pdbx_xplor_file, "topol_file") << std::endl
1885 
1886  << RM3("") << std::endl;
1887 }
1888 
1889 void WriteRemark3NuclSQ(std::ostream &pdbFile, const datablock &db)
1890 {
1891  auto refine = db["refine"].front();
1892  auto pdbx_refine = db["pdbx_refine"].front();
1893  auto hist = db["refine_hist"].front();
1894  auto reflns = db["reflns"].front();
1895  auto analyze = db["refine_analyze"].front();
1896  auto &ls_restr = db["refine_ls_restr"];
1897 
1898  pdbFile << RM3("") << std::endl
1899  << RM3(" DATA USED IN REFINEMENT.") << std::endl
1900 
1901  << RM3(" RESOLUTION RANGE HIGH (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_high") << std::endl
1902  << RM3(" RESOLUTION RANGE LOW (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_low") << std::endl
1903  << RM3(" DATA CUTOFF (SIGMA(F)) : ", 6, 3) << Ff(refine, "pdbx_ls_sigma_F") << std::endl
1904  << RM3(" COMPLETENESS FOR RANGE (%) : ", 5, 2) << Ff(refine, "ls_percent_reflns_obs") << std::endl
1905  << RM3(" NUMBER OF REFLECTIONS : ", 12, 6) << Fi(refine, "ls_number_reflns_obs") << std::endl
1906 
1907  << RM3("") << std::endl
1908  << RM3(" FIT TO DATA USED IN REFINEMENT.") << std::endl
1909  << RM3(" CROSS-VALIDATION METHOD : ") << Fs(refine, "pdbx_ls_cross_valid_method") << std::endl
1910  << RM3(" FREE R VALUE TEST SET SELECTION : ") << Fs(refine, "pdbx_R_Free_selection_details") << std::endl
1911  << RM3(" R VALUE (WORKING + TEST SET) : ", 7, 3) << Ff(refine, "ls_R_factor_obs") << std::endl
1912  << RM3(" R VALUE (WORKING SET) : ", 7, 3) << Ff(refine, "ls_R_factor_R_work") << std::endl
1913  << RM3(" FREE R VALUE : ", 7, 3) << Ff(refine, "ls_R_factor_R_free") << std::endl
1914  << RM3(" FREE R VALUE TEST SET SIZE (%) : ", 7, 3) << Ff(refine, "ls_percent_reflns_R_free") << std::endl
1915  << RM3(" FREE R VALUE TEST SET COUNT : ", 12, 6) << Fi(refine, "ls_number_reflns_R_free") << std::endl
1916 
1917  << RM3("") << std::endl
1918  << RM3(" FIT/AGREEMENT OF MODEL WITH ALL DATA.") << std::endl
1919  << RM3(" R VALUE (WORKING + TEST SET, NO CUTOFF) : ") << Fs(refine, "ls_R_factor_all") << std::endl
1920  << RM3(" R VALUE (WORKING SET, NO CUTOFF) : ") << Fs(pdbx_refine, "R_factor_obs_no_cutoff") << std::endl
1921 
1922  << RM3(" FREE R VALUE (NO CUTOFF) : ") << Fs(pdbx_refine, "free_R_factor_no_cutoff") << std::endl
1923  << RM3(" FREE R VALUE TEST SET SIZE (%, NO CUTOFF) : ") << Fs(pdbx_refine, "free_R_val_test_set_size_perc_no_cutoff") << std::endl
1924  << RM3(" FREE R VALUE TEST SET COUNT (NO CUTOFF) : ") << Fs(pdbx_refine, "free_R_val_test_set_ct_no_cutoff") << std::endl
1925  << RM3(" TOTAL NUMBER OF REFLECTIONS (NO CUTOFF) : ") << Fs(refine, "ls_number_reflns_all") << std::endl
1926 
1927  << RM3("") << std::endl
1928  << RM3(" NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.") << std::endl
1929  << RM3(" PROTEIN ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_protein") << std::endl
1930  << RM3(" NUCLEIC ACID ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_nucleic_acid") << std::endl
1931  << RM3(" HETEROGEN ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_ligand") << std::endl
1932  << RM3(" SOLVENT ATOMS : ", 12, 6) << Fi(hist, "number_atoms_solvent") << std::endl
1933  // << RM3(" ALL ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_protein") << std::endl
1934 
1935  << RM3("") << std::endl
1936  << RM3(" B VALUES.") << std::endl
1937  // << RM3(" B VALUE TYPE : ", 7, 2) << Fs(refine, "pdbx_TLS_residual_ADP_flag") << std::endl
1938  << RM3(" FROM WILSON PLOT (A**2) : ", 7, 2) << Ff(reflns, "B_iso_Wilson_estimate") << std::endl
1939  << RM3(" MEAN B VALUE (OVERALL, A**2) : ", 7, 2) << Ff(refine, "B_iso_mean") << std::endl
1940  << RM3(" OVERALL ANISOTROPIC B VALUE.") << std::endl
1941  << RM3(" B11 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][1]") << std::endl
1942  << RM3(" B22 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[2][2]") << std::endl
1943  << RM3(" B33 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[3][3]") << std::endl
1944  << RM3(" B12 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][2]") << std::endl
1945  << RM3(" B13 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][3]") << std::endl
1946  << RM3(" B23 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[2][3]") << std::endl
1947 
1948  << RM3("") << std::endl
1949  << RM3(" ESTIMATED COORDINATE ERROR.") << std::endl
1950  << RM3(" ESD FROM LUZZATI PLOT (A) : ", 7, 2) << Ff(analyze, "Luzzati_coordinate_error_obs") << std::endl
1951  << RM3(" ESD FROM SIGMAA (A) : ", 7, 2) << Ff(analyze, "Luzzati_sigma_a_obs") << std::endl
1952  << RM3(" LOW RESOLUTION CUTOFF (A) : ", 7, 2) << Ff(analyze, "Luzzati_d_res_low_obs") << std::endl
1953 
1954  << RM3("") << std::endl
1955  << RM3(" RMS DEVIATIONS FROM IDEAL VALUES.") << std::endl
1956  << RM3(" DISTANCE RESTRAINTS. RMS SIGMA") << std::endl
1957  << RM3(" SUGAR-BASE BOND DISTANCE (A) : ", 7, 3) << Ff(ls_restr, key("type") == "n_sugar_bond_d", "dev_ideal") << " ; "
1958  << Ff(ls_restr, key("type") == "n_sugar_bond_d", "dev_ideal_target") << std::endl
1959  << RM3(" SUGAR-BASE BOND ANGLE DISTANCE (A) : ", 7, 3) << Ff(ls_restr, key("type") == "n_sugar_bond_angle_d", "dev_ideal") << " ; "
1960  << Ff(ls_restr, key("type") == "n_sugar_bond_angle_d", "dev_ideal_target") << std::endl
1961  << RM3(" PHOSPHATE BONDS DISTANCE (A) : ", 7, 3) << Ff(ls_restr, key("type") == "n_phos_bond_d", "dev_ideal") << " ; "
1962  << Ff(ls_restr, key("type") == "n_phos_bond_d", "dev_ideal_target") << std::endl
1963  << RM3(" PHOSPHATE BOND ANGLE, H-BOND (A) : ", 7, 3) << Ff(ls_restr, key("type") == "n_phos_bond_angle_d", "dev_ideal") << " ; "
1964  << Ff(ls_restr, key("type") == "n_phos_bond_angle_d", "dev_ideal_target") << std::endl
1965 
1966  << RM3("") << std::endl
1967  << RM3(" PLANE RESTRAINT (A) : ", 7, 3) << Ff(ls_restr, key("type") == "n_plane_restr", "dev_ideal") << " ; "
1968  << Ff(ls_restr, key("type") == "n_plane_restr", "dev_ideal_target") << std::endl
1969  << RM3(" CHIRAL-CENTER RESTRAINT (A**3) : ", 7, 3) << Ff(ls_restr, key("type") == "n_chiral_restr", "dev_ideal") << " ; "
1970  << Ff(ls_restr, key("type") == "n_chiral_restr", "dev_ideal_target") << std::endl
1971 
1972  << RM3("") << std::endl
1973  << RM3(" NON-BONDED CONTACT RESTRAINTS.") << std::endl
1974  << RM3(" SINGLE TORSION CONTACT (A) : ", 7, 3) << Ff(ls_restr, key("type") == "n_singtor_nbd", "dev_ideal") << " ; "
1975  << Ff(ls_restr, key("type") == "n_singtor_nbd", "dev_ideal_target") << std::endl
1976  << RM3(" MULTIPLE TORSION CONTACT (A) : ", 7, 3) << Ff(ls_restr, key("type") == "n_multtor_nbd", "dev_ideal") << " ; "
1977  << Ff(ls_restr, key("type") == "n_multtor_nbd", "dev_ideal_target") << std::endl
1978 
1979  << RM3("") << std::endl
1980  << RM3(" ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA") << std::endl
1981  << RM3(" SUGAR-BASE BONDS (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "n_sugar_bond_it", "dev_ideal") << " ; "
1982  << Ff(ls_restr, key("type") == "n_sugar_bond_it", "dev_ideal_target") << std::endl
1983  << RM3(" SUGAR-BASE ANGLES (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "n_sugar_angle_it", "dev_ideal") << " ; "
1984  << Ff(ls_restr, key("type") == "n_sugar_angle_it", "dev_ideal_target") << std::endl
1985  << RM3(" PHOSPHATE BONDS (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "n_phos_bond_it", "dev_ideal") << " ; "
1986  << Ff(ls_restr, key("type") == "n_phos_bond_it", "dev_ideal_target") << std::endl
1987  << RM3(" PHOSPHATE BOND ANGLE, H-BOND (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "n_phos_angle_it", "dev_ideal") << " ; "
1988  << Ff(ls_restr, key("type") == "n_phos_angle_it", "dev_ideal_target") << std::endl
1989 
1990  << RM3("") << std::endl;
1991 }
1992 
1993 void WriteRemark3ProlSQ(std::ostream &pdbFile, const datablock &db)
1994 {
1995  auto refine = db["refine"].front();
1996  auto pdbx_refine = db["pdbx_refine"].front();
1997  auto hist = db["refine_hist"].front();
1998  auto reflns = db["reflns"].front();
1999  auto analyze = db["refine_analyze"].front();
2000  auto &ls_restr = db["refine_ls_restr"];
2001 
2002  pdbFile << RM3("") << std::endl
2003  << RM3(" DATA USED IN REFINEMENT.") << std::endl
2004 
2005  << RM3(" RESOLUTION RANGE HIGH (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_high") << std::endl
2006  << RM3(" RESOLUTION RANGE LOW (ANGSTROMS) : ", 5, 2) << Ff(refine, "ls_d_res_low") << std::endl
2007  << RM3(" DATA CUTOFF (SIGMA(F)) : ", 6, 3) << Ff(refine, "pdbx_ls_sigma_F") << std::endl
2008  << RM3(" COMPLETENESS FOR RANGE (%) : ", 5, 2) << Ff(refine, "ls_percent_reflns_obs") << std::endl
2009  << RM3(" NUMBER OF REFLECTIONS : ", 12, 6) << Fi(refine, "ls_number_reflns_obs") << std::endl
2010 
2011  << RM3("") << std::endl
2012  << RM3(" FIT TO DATA USED IN REFINEMENT.") << std::endl
2013  << RM3(" CROSS-VALIDATION METHOD : ") << Fs(refine, "pdbx_ls_cross_valid_method") << std::endl
2014  << RM3(" FREE R VALUE TEST SET SELECTION : ") << Fs(refine, "pdbx_R_Free_selection_details") << std::endl
2015  << RM3(" R VALUE (WORKING + TEST SET) : ", 7, 3) << Ff(refine, "ls_R_factor_obs") << std::endl
2016  << RM3(" R VALUE (WORKING SET) : ", 7, 3) << Ff(refine, "ls_R_factor_R_work") << std::endl
2017  << RM3(" FREE R VALUE : ", 7, 3) << Ff(refine, "ls_R_factor_R_free") << std::endl
2018  << RM3(" FREE R VALUE TEST SET SIZE (%) : ", 7, 3) << Ff(refine, "ls_percent_reflns_R_free") << std::endl
2019  << RM3(" FREE R VALUE TEST SET COUNT : ", 12, 6) << Fi(refine, "ls_number_reflns_R_free") << std::endl
2020 
2021  << RM3("") << std::endl
2022  << RM3(" FIT/AGREEMENT OF MODEL WITH ALL DATA.") << std::endl
2023  << RM3(" R VALUE (WORKING + TEST SET, NO CUTOFF) : ") << Fs(refine, "ls_R_factor_all") << std::endl
2024  << RM3(" R VALUE (WORKING SET, NO CUTOFF) : ") << Fs(pdbx_refine, "R_factor_obs_no_cutoff") << std::endl
2025 
2026  << RM3(" FREE R VALUE (NO CUTOFF) : ") << Fs(pdbx_refine, "free_R_factor_no_cutoff") << std::endl
2027  << RM3(" FREE R VALUE TEST SET SIZE (%, NO CUTOFF) : ") << Fs(pdbx_refine, "free_R_val_test_set_size_perc_no_cutoff") << std::endl
2028  << RM3(" FREE R VALUE TEST SET COUNT (NO CUTOFF) : ") << Fs(pdbx_refine, "free_R_val_test_set_ct_no_cutoff") << std::endl
2029  << RM3(" TOTAL NUMBER OF REFLECTIONS (NO CUTOFF) : ") << Fs(refine, "ls_number_reflns_all") << std::endl
2030 
2031  << RM3("") << std::endl
2032  << RM3(" NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.") << std::endl
2033  << RM3(" PROTEIN ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_protein") << std::endl
2034  << RM3(" NUCLEIC ACID ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_nucleic_acid") << std::endl
2035  << RM3(" HETEROGEN ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_ligand") << std::endl
2036  << RM3(" SOLVENT ATOMS : ", 12, 6) << Fi(hist, "number_atoms_solvent") << std::endl
2037  // << RM3(" ALL ATOMS : ", 12, 6) << Fi(hist, "pdbx_number_atoms_protein") << std::endl
2038 
2039  << RM3("") << std::endl
2040  << RM3(" B VALUES.") << std::endl
2041  // << RM3(" B VALUE TYPE : ", 7, 2) << Fs(refine, "pdbx_TLS_residual_ADP_flag") << std::endl
2042  << RM3(" FROM WILSON PLOT (A**2) : ", 7, 2) << Ff(reflns, "B_iso_Wilson_estimate") << std::endl
2043  << RM3(" MEAN B VALUE (OVERALL, A**2) : ", 7, 2) << Ff(refine, "B_iso_mean") << std::endl
2044  << RM3(" OVERALL ANISOTROPIC B VALUE.") << std::endl
2045  << RM3(" B11 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][1]") << std::endl
2046  << RM3(" B22 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[2][2]") << std::endl
2047  << RM3(" B33 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[3][3]") << std::endl
2048  << RM3(" B12 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][2]") << std::endl
2049  << RM3(" B13 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[1][3]") << std::endl
2050  << RM3(" B23 (A**2) : ", -7, 2) << Ff(refine, "aniso_B[2][3]") << std::endl
2051 
2052  << RM3("") << std::endl
2053  << RM3(" ESTIMATED COORDINATE ERROR.") << std::endl
2054  << RM3(" ESD FROM LUZZATI PLOT (A) : ", 7, 2) << Ff(analyze, "Luzzati_coordinate_error_obs") << std::endl
2055  << RM3(" ESD FROM SIGMAA (A) : ", 7, 2) << Ff(analyze, "Luzzati_sigma_a_obs") << std::endl
2056  << RM3(" LOW RESOLUTION CUTOFF (A) : ", 7, 2) << Ff(analyze, "Luzzati_d_res_low_obs") << std::endl
2057 
2058  << RM3("") << std::endl
2059  << RM3(" RMS DEVIATIONS FROM IDEAL VALUES.") << std::endl
2060  << RM3(" DISTANCE RESTRAINTS. RMS SIGMA") << std::endl
2061  << RM3(" BOND LENGTH (A) : ", 7, 3) << Ff(ls_restr, key("type") == "p_bond_d", "dev_ideal") << " ; "
2062  << Ff(ls_restr, key("type") == "p_bond_d", "dev_ideal_target") << std::endl
2063  << RM3(" ANGLE DISTANCE (A) : ", 7, 3) << Ff(ls_restr, key("type") == "p_angle_d", "dev_ideal") << " ; "
2064  << Ff(ls_restr, key("type") == "p_angle_d", "dev_ideal_target") << std::endl
2065  << RM3(" INTRAPLANAR 1-4 DISTANCE (A) : ", 7, 3) << Ff(ls_restr, key("type") == "p_planar_d", "dev_ideal") << " ; "
2066  << Ff(ls_restr, key("type") == "p_planar_d", "dev_ideal_target") << std::endl
2067  << RM3(" H-BOND OR METAL COORDINATION (A) : ", 7, 3) << Ff(ls_restr, key("type") == "p_hb_or_metal_coord", "dev_ideal") << " ; "
2068  << Ff(ls_restr, key("type") == "p_hb_or_metal_coord", "dev_ideal_target") << std::endl
2069 
2070  << RM3("") << std::endl
2071  << RM3(" PLANE RESTRAINT (A) : ", 7, 3) << Ff(ls_restr, key("type") == "p_plane_restr", "dev_ideal") << " ; "
2072  << Ff(ls_restr, key("type") == "p_plane_restr", "dev_ideal_target") << std::endl
2073  << RM3(" CHIRAL-CENTER RESTRAINT (A**3) : ", 7, 3) << Ff(ls_restr, key("type") == "p_chiral_restr", "dev_ideal") << " ; "
2074  << Ff(ls_restr, key("type") == "p_chiral_restr", "dev_ideal_target") << std::endl
2075 
2076  << RM3("") << std::endl
2077  << RM3(" NON-BONDED CONTACT RESTRAINTS.") << std::endl
2078  << RM3(" SINGLE TORSION (A) : ", 7, 3) << Ff(ls_restr, key("type") == "p_singtor_nbd", "dev_ideal") << " ; "
2079  << Ff(ls_restr, key("type") == "p_singtor_nbd", "dev_ideal_target") << std::endl
2080  << RM3(" MULTIPLE TORSION (A) : ", 7, 3) << Ff(ls_restr, key("type") == "p_multtor_nbd", "dev_ideal") << " ; "
2081  << Ff(ls_restr, key("type") == "p_multtor_nbd", "dev_ideal_target") << std::endl
2082  << RM3(" H-BOND (X...Y) (A) : ", 7, 3) << Ff(ls_restr, key("type") == "p_xyhbond_nbd", "dev_ideal") << " ; "
2083  << Ff(ls_restr, key("type") == "p_xyhbond_nbd", "dev_ideal_target") << std::endl
2084  << RM3(" H-BOND (X-H...Y) (A) : ", 7, 3) << Ff(ls_restr, key("type") == "p_xhyhbond_nbd", "dev_ideal") << " ; "
2085  << Ff(ls_restr, key("type") == "p_xhyhbond_nbd", "dev_ideal_target") << std::endl
2086 
2087  << RM3("") << std::endl
2088  << RM3(" CONFORMATIONAL TORSION ANGLE RESTRAINTS.") << std::endl
2089  << RM3(" SPECIFIED (DEGREES) : ", 7, 3) << Ff(ls_restr, key("type") == "p_special_tor", "dev_ideal") << " ; "
2090  << Ff(ls_restr, key("type") == "p_special_tor", "dev_ideal_target") << std::endl
2091  << RM3(" PLANAR (DEGREES) : ", 7, 3) << Ff(ls_restr, key("type") == "p_planar_tor", "dev_ideal") << " ; "
2092  << Ff(ls_restr, key("type") == "p_planar_tor", "dev_ideal_target") << std::endl
2093  << RM3(" STAGGERED (DEGREES) : ", 7, 3) << Ff(ls_restr, key("type") == "p_staggered_tor", "dev_ideal") << " ; "
2094  << Ff(ls_restr, key("type") == "p_staggered_tor", "dev_ideal_target") << std::endl
2095  << RM3(" TRANSVERSE (DEGREES) : ", 7, 3) << Ff(ls_restr, key("type") == "p_transverse_tor", "dev_ideal") << " ; "
2096  << Ff(ls_restr, key("type") == "p_transverse_tor", "dev_ideal_target") << std::endl
2097 
2098  << RM3("") << std::endl
2099  << RM3(" ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA") << std::endl
2100  << RM3(" MAIN-CHAIN BOND (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "p_mcbond_it", "dev_ideal") << " ; "
2101  << Ff(ls_restr, key("type") == "p_mcbond_it", "dev_ideal_target") << std::endl
2102  << RM3(" MAIN-CHAIN ANGLE (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "p_mcangle_it", "dev_ideal") << " ; "
2103  << Ff(ls_restr, key("type") == "p_mcangle_it", "dev_ideal_target") << std::endl
2104  << RM3(" SIDE-CHAIN BOND (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "p_scbond_it", "dev_ideal") << " ; "
2105  << Ff(ls_restr, key("type") == "p_scbond_it", "dev_ideal_target") << std::endl
2106  << RM3(" SIDE-CHAIN ANGLE (A**2) : ", 7, 3) << Ff(ls_restr, key("type") == "p_scangle_it", "dev_ideal") << " ; "
2107  << Ff(ls_restr, key("type") == "p_scangle_it", "dev_ideal_target") << std::endl
2108 
2109  << RM3("") << std::endl;
2110 }
2111 
2112 void WriteRemark3(std::ostream &pdbFile, const datablock &db)
2113 {
2114  std::string program, authors;
2115 
2116  if (not db["pdbx_nmr_software"].empty())
2117  {
2118  auto software = db["pdbx_nmr_software"].find(key("classification") == "refinement");
2119  if (software.size() == 1)
2120  cif::tie(program, authors) = software.front().get("name", "authors");
2121  else if (software.size() > 1)
2122  {
2123  for (auto r : software)
2124  {
2125  if (program.empty() == false)
2126  {
2127  program += ", ";
2128  authors += ", ";
2129  }
2130 
2131  program += r["name"].as<std::string>();
2132  authors += r["authors"].as<std::string>() + " (" + r["name"].as<std::string>() + ")";
2133  }
2134  }
2135  }
2136 
2137  if (program.empty())
2138  program = cifSoftware(db, eRefinement);
2139 
2140  if (authors.empty())
2141  authors = "NULL";
2142 
2143  if (not program.empty())
2144  {
2145  pdbFile << RM3("") << std::endl
2146  << RM3("REFINEMENT.") << std::endl;
2147 
2148  int l = 0;
2149  for (auto s : word_wrap(program, 52))
2150  pdbFile << RM3(++l == 1 ? " PROGRAM : " : " ") << s << std::endl;
2151 
2152  l = 0;
2153  for (auto s : word_wrap(authors, 52))
2154  pdbFile << RM3(++l == 1 ? " AUTHORS : " : " ") << s << std::endl;
2155  }
2156 
2157  if (not db["refine"].empty())
2158  {
2159  auto s = program.find(' ');
2160  if (s != std::string::npos)
2161  program.erase(s, std::string::npos);
2162 
2163  if (iequals(program, "BUSTER") or iequals(program, "BUSTER-TNT") or iequals(program, "TNT"))
2164  WriteRemark3BusterTNT(pdbFile, db);
2165  else if (iequals(program, "CNS") or iequals(program, "CNX"))
2166  WriteRemark3CNS(pdbFile, db);
2167  else if (iequals(program, "X-PLOR"))
2168  WriteRemark3XPlor(pdbFile, db);
2169  else if (iequals(program, "REFMAC"))
2170  WriteRemark3Refmac(pdbFile, db);
2171  else if (iequals(program, "SHELXL"))
2172  WriteRemark3Shelxl(pdbFile, db);
2173  else if (iequals(program, "PHENIX"))
2174  WriteRemark3Phenix(pdbFile, db);
2175  else if (iequals(program, "NUCLSQ"))
2176  WriteRemark3NuclSQ(pdbFile, db);
2177  else if (iequals(program, "PROLSQ"))
2178  WriteRemark3ProlSQ(pdbFile, db);
2179  }
2180 
2181  for (auto r : db["refine"])
2182  {
2183  std::string remarks = r["details"].as<std::string>();
2184  if (remarks.empty())
2185  remarks = "NULL";
2186 
2187  WriteOneContinuedLine(pdbFile, "REMARK 3 ", 0, "OTHER REFINEMENT REMARKS: " + remarks);
2188  break;
2189  }
2190 }
2191 
2192 void WriteRemark200(std::ostream &pdbFile, const datablock &db)
2193 {
2194  typedef RM<200> RM;
2195 
2196  try
2197  {
2198  for (auto diffrn : db["diffrn"])
2199  {
2200  std::string diffrn_id = diffrn["id"].as<std::string>();
2201  std::string crystal_id = diffrn["crystal_id"].as<std::string>();
2202 
2203  auto diffrn_radiation = db["diffrn_radiation"].find_first(key("diffrn_id") == diffrn_id);
2204  auto diffrn_radiation_wavelength = db["diffrn_radiation_wavelength"].find_first(key("id") == diffrn_radiation["wavelength_id"].as<std::string>());
2205  auto diffrn_source = db["diffrn_source"].find_first(key("diffrn_id") == diffrn_id);
2206  auto diffrn_detector = db["diffrn_detector"].find_first(key("diffrn_id") == diffrn_id);
2207  auto exptl = db["exptl"].find_first(key("entry_id") == db.name());
2208  auto exptl_crystal = db["exptl_crystal"].find_first(key("id") == crystal_id);
2209  auto exptl_crystal_grow = db["exptl_crystal_grow"].find_first(key("crystal_id") == crystal_id);
2210  auto computing = db["computing"].find_first(key("entry_id") == db.name());
2211  auto reflns = db["reflns"].find_first(key("entry_id") == db.name());
2212 
2213  std::string pdbx_diffrn_id = reflns["pdbx_diffrn_id"].as<std::string>();
2214 
2215  auto reflns_shell = db["reflns_shell"].find_first(key("pdbx_diffrn_id") == pdbx_diffrn_id);
2216  auto refine = db["refine"].find_first(key("pdbx_diffrn_id") == pdbx_diffrn_id);
2217 
2218  std::string date =
2219  diffrn_detector.empty() ? "NULL" : cif2pdbDate(diffrn_detector["pdbx_collection_date"].as<std::string>());
2220 
2221  std::string iis = cifSoftware(db, eDataReduction);
2222  std::string dss = cifSoftware(db, eDataScaling);
2223 
2224  std::string source = diffrn_source["source"].as<std::string>();
2225  std::string synchrotron, type;
2226 
2227  if (source.empty())
2228  synchrotron = "NULL";
2229  else if (iequals(source, "SYNCHROTRON"))
2230  {
2231  synchrotron = "Y";
2232  source = diffrn_source["pdbx_synchrotron_site"].as<std::string>();
2233  if (source.empty())
2234  source = "NULL";
2235  type = "NULL";
2236  }
2237  else
2238  {
2239  synchrotron = "N";
2240  type = diffrn_source["type"].as<std::string>();
2241  if (type.empty())
2242  type = "NULL";
2243  }
2244 
2245  if (source.empty())
2246  source = "NULL";
2247  if (type.empty())
2248  type = "NULL";
2249 
2250  pdbFile << RM("") << std::endl
2251  << RM("EXPERIMENTAL DETAILS") << std::endl
2252  << RM(" EXPERIMENT TYPE : ") << Fs(exptl, "method") << std::endl
2253  << RM(" DATE OF DATA COLLECTION : ") << date << std::endl
2254  << RM(" TEMPERATURE (KELVIN) : ", 5, 1) << Ff(diffrn, "ambient_temp") << std::endl
2255  << RM(" PH : ", 4, 1) << Ff(exptl_crystal_grow, "ph") << std::endl
2256  << RM(" NUMBER OF CRYSTALS USED : ") << Fi(exptl, "crystals_number") << std::endl
2257  << RM("") << std::endl
2258  << RM(" SYNCHROTRON (Y/N) : ") << synchrotron << std::endl
2259  << RM(" RADIATION SOURCE : ") << source << std::endl
2260  << RM(" BEAMLINE : ") << Fs(diffrn_source, "pdbx_synchrotron_beamline") << std::endl
2261  << RM(" X-RAY GENERATOR MODEL : ") << type << std::endl
2262  << RM(" MONOCHROMATIC OR LAUE (M/L) : ") << Fs(diffrn_radiation, "pdbx_monochromatic_or_laue_m_l") << std::endl
2263  << RM(" WAVELENGTH OR RANGE (A) : ", 7, 4) << Ff(diffrn_radiation_wavelength, "wavelength") << std::endl
2264  << RM(" MONOCHROMATOR : ") << Fs(diffrn_radiation, "monochromator") << std::endl
2265  << RM(" OPTICS : ") << Fs(diffrn_detector, "details") << std::endl
2266  << RM("") << std::endl
2267  << RM(" DETECTOR TYPE : ") << Fs(diffrn_detector, "detector") << std::endl
2268  << RM(" DETECTOR MANUFACTURER : ") << Fs(diffrn_detector, "type") << std::endl
2269  << RM(" INTENSITY-INTEGRATION SOFTWARE : ") << iis << std::endl
2270  << RM(" DATA SCALING SOFTWARE : ") << dss << std::endl
2271  << RM(" ") << std::endl
2272  << RM(" NUMBER OF UNIQUE REFLECTIONS : ") << Fi(reflns, "number_obs") << std::endl
2273  << RM(" RESOLUTION RANGE HIGH (A) : ", 7, 3) << Ff(reflns, "d_resolution_high") << std::endl
2274  << RM(" RESOLUTION RANGE LOW (A) : ", 7, 3) << Ff(reflns, "d_resolution_low") << std::endl
2275  << RM(" REJECTION CRITERIA (SIGMA(I)) : ", 7, 3) << Ff(reflns, "observed_criterion_sigma_I") << std::endl
2276  << RM("") << std::endl
2277  << RM("OVERALL.") << std::endl
2278  << RM(" COMPLETENESS FOR RANGE (%) : ", 7, 1) << Ff(reflns, "percent_possible_obs") << std::endl
2279  << RM(" DATA REDUNDANCY : ", 7, 3) << Ff(reflns, "pdbx_redundancy") << std::endl
2280  << RM(" R MERGE (I) : ", 7, 5) << Ff(reflns, "pdbx_Rmerge_I_obs") << std::endl
2281  << RM(" R SYM (I) : ", 7, 5) << Ff(reflns, "pdbx_Rsym_value") << std::endl
2282  << RM(" <I/SIGMA(I)> FOR THE DATA SET : ", 7, 4) << Ff(reflns, "pdbx_netI_over_sigmaI") << std::endl
2283  << RM("") << std::endl
2284  << RM("IN THE HIGHEST RESOLUTION SHELL.") << std::endl
2285  << RM(" HIGHEST RESOLUTION SHELL, RANGE HIGH (A) : ", 7, 2) << Ff(reflns_shell, "d_res_high") << std::endl
2286  << RM(" HIGHEST RESOLUTION SHELL, RANGE LOW (A) : ", 7, 2) << Ff(reflns_shell, "d_res_low") << std::endl
2287  << RM(" COMPLETENESS FOR SHELL (%) : ", 7, 1) << Ff(reflns_shell, "percent_possible_all") << std::endl
2288  << RM(" DATA REDUNDANCY IN SHELL : ", 7, 2) << Ff(reflns_shell, "pdbx_redundancy") << std::endl
2289  << RM(" R MERGE FOR SHELL (I) : ", 7, 5) << Ff(reflns_shell, "Rmerge_I_obs") << std::endl
2290  << RM(" R SYM FOR SHELL (I) : ", 7, 5) << Ff(reflns_shell, "pdbx_Rsym_value") << std::endl
2291  << RM(" <I/SIGMA(I)> FOR SHELL : ", 7, 3) << Ff(reflns_shell, "meanI_over_sigI_obs") << std::endl
2292  << RM("") << std::endl;
2293 
2294  struct
2295  {
2296  row_handle r;
2297  const char *field;
2298  const char *dst;
2299  } kTail[] = {
2300  { diffrn_radiation, "pdbx_diffrn_protocol", "DIFFRACTION PROTOCOL: " },
2301  { refine, "pdbx_method_to_determine_struct", "METHOD USED TO DETERMINE THE STRUCTURE: " },
2302  { computing, "structure_solution", "SOFTWARE USED: " },
2303  { refine, "pdbx_starting_model", "STARTING MODEL: " },
2304  { exptl_crystal, "description", "\nREMARK: " }
2305  };
2306 
2307  for (auto &t : kTail)
2308  {
2309  std::string s = t.r[t.field].as<std::string>();
2310 
2311  if (s.empty())
2312  {
2313  if (strcmp(t.field, "structure_solution") == 0)
2314  s = cifSoftware(db, ePhasing);
2315  else
2316  s = "NULL";
2317  }
2318 
2319  WriteOneContinuedLine(pdbFile, "REMARK 200", 0, t.dst + s);
2320  }
2321 
2322  break;
2323  }
2324  }
2325  catch (const std::exception &ex)
2326  {
2327  if (VERBOSE >= 0)
2328  std::cerr << ex.what() << std::endl;
2329  }
2330 }
2331 
2332 void WriteRemark280(std::ostream &pdbFile, const datablock &db)
2333 {
2334  typedef RM<280> RM;
2335 
2336  try
2337  {
2338  for (auto exptl_crystal : db["exptl_crystal"])
2339  {
2340  std::string crystal_id = exptl_crystal["id"].as<std::string>();
2341  auto exptl_crystal_grow = db["exptl_crystal_grow"].find_first(key("crystal_id") == crystal_id);
2342 
2343  pdbFile
2344  << RM("") << std::endl
2345  << RM("CRYSTAL") << std::endl
2346  << RM("SOLVENT CONTENT, VS (%): ", 6, 2) << Ff(exptl_crystal, "density_percent_sol") << std::endl
2347  << RM("MATTHEWS COEFFICIENT, VM (ANGSTROMS**3/DA): ", 6, 2) << Ff(exptl_crystal, "density_Matthews") << std::endl
2348  << RM("") << std::endl;
2349 
2350  std::vector<std::string> conditions;
2351  auto add = [&conditions](const std::string c)
2352  {
2353  if (find(conditions.begin(), conditions.end(), c) == conditions.end())
2354  conditions.push_back(c);
2355  };
2356 
2357  const char *keys[] = { "pdbx_details", "ph", "method", "temp" };
2358 
2359  for (size_t i = 0; i < (sizeof(keys) / sizeof(const char *)); ++i)
2360  {
2361  const char *c = keys[i];
2362 
2363  std::string v = exptl_crystal_grow[c].as<std::string>();
2364  if (not v.empty())
2365  {
2366  to_upper(v);
2367 
2368  switch (i)
2369  {
2370  case 1: add("PH " + v); break;
2371  case 3: add("TEMPERATURE " + v + "K"); break;
2372 
2373  default:
2374  for (std::string::size_type b = 0, e = v.find(", "); b != std::string::npos; b = (e == std::string::npos ? e : e + 2), e = v.find(", ", b))
2375  add(v.substr(b, e - b));
2376  break;
2377  }
2378  }
2379  }
2380 
2381  WriteOneContinuedLine(pdbFile, "REMARK 280", 0, "CRYSTALLIZATION CONDITIONS: " + (conditions.empty() ? "NULL" : join(conditions, ", ")));
2382 
2383  break;
2384  }
2385  }
2386  catch (const std::exception &ex)
2387  {
2388  if (VERBOSE >= 0)
2389  std::cerr << ex.what() << std::endl;
2390  }
2391 }
2392 
2393 void WriteRemark350(std::ostream &pdbFile, const datablock &db)
2394 {
2395  auto &c1 = db["pdbx_struct_assembly"];
2396  if (c1.empty())
2397  return;
2398 
2399  std::vector<std::string> biomolecules, details;
2400  for (auto bm : c1)
2401  {
2402  std::string id = bm["id"].as<std::string>();
2403  biomolecules.push_back(id);
2404 
2405  for (auto r : db["struct_biol"].find(key("id") == id))
2406  {
2407  std::string s = r["details"].as<std::string>();
2408  if (not s.empty())
2409  details.push_back(s);
2410  }
2411  }
2412 
2413  // write out the mandatory REMARK 300 first
2414 
2415  pdbFile << RM<300>("") << std::endl
2416  << RM<300>("BIOMOLECULE: ") << join(biomolecules, ", ") << std::endl
2417  << RM<300>("SEE REMARK 350 FOR THE AUTHOR PROVIDED AND/OR PROGRAM") << std::endl
2418  << RM<300>("GENERATED ASSEMBLY INFORMATION FOR THE STRUCTURE IN") << std::endl
2419  << RM<300>("THIS ENTRY. THE REMARK MAY ALSO PROVIDE INFORMATION ON") << std::endl
2420  << RM<300>("BURIED SURFACE AREA.") << std::endl;
2421 
2422  if (not details.empty())
2423  {
2424  pdbFile << RM<300>("REMARK:") << std::endl;
2425 
2426  for (auto detail : details)
2427  WriteOneContinuedLine(pdbFile, "REMARK 300", 0, detail);
2428  }
2429 
2430  typedef RM<350> RM;
2431 
2432  pdbFile << RM("") << std::endl
2433  << RM("COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN") << std::endl
2434  << RM("BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE") << std::endl
2435  << RM("MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS") << std::endl
2436  << RM("GIVEN BELOW. BOTH NON-CRYSTALLOGRAPHIC AND") << std::endl
2437  << RM("CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN.") << std::endl;
2438 
2439  for (auto bm : c1)
2440  {
2441  std::string id, detail, method, oligomer;
2442  cif::tie(id, detail, method, oligomer) = bm.get("id", "details", "method_details", "oligomeric_details");
2443 
2444  pdbFile << RM("") << std::endl
2445  << RM("BIOMOLECULE: ") << id << std::endl;
2446 
2447  to_upper(oligomer);
2448 
2449  if (detail == "author_defined_assembly" or detail == "author_and_software_defined_assembly")
2450  pdbFile << RM("AUTHOR DETERMINED BIOLOGICAL UNIT: ") << oligomer << std::endl;
2451 
2452  if (detail == "software_defined_assembly" or detail == "author_and_software_defined_assembly")
2453  pdbFile << RM("SOFTWARE DETERMINED QUATERNARY STRUCTURE: ") << oligomer << std::endl;
2454 
2455  if (not method.empty())
2456  pdbFile << RM("SOFTWARE USED: ") << method << std::endl;
2457 
2458  for (std::string type : { "ABSA (A^2)", "SSA (A^2)", "MORE" })
2459  {
2460  for (auto prop : db["pdbx_struct_assembly_prop"].find(key("biol_id") == id and key("type") == type))
2461  {
2462  std::string value = prop["value"].as<std::string>();
2463 
2464  if (iequals(type, "ABSA (A^2)"))
2465  pdbFile << RM("TOTAL BURIED SURFACE AREA: ") << value << " ANGSTROM**2" << std::endl;
2466  else if (iequals(type, "SSA (A^2)"))
2467  pdbFile << RM("SURFACE AREA OF THE COMPLEX: ") << value << " ANGSTROM**2" << std::endl;
2468  else if (iequals(type, "MORE"))
2469  pdbFile << RM("CHANGE IN SOLVENT FREE ENERGY: ") << value << " KCAL/MOL" << std::endl;
2470  }
2471  }
2472 
2473  auto gen = db["pdbx_struct_assembly_gen"].find_first(key("assembly_id") == id);
2474 
2475  if (gen)
2476  {
2477  std::string asym_id_list, oper_id_list;
2478  cif::tie(asym_id_list, oper_id_list) = gen.get("asym_id_list", "oper_expression");
2479 
2480  auto asyms = split<std::string>(asym_id_list, ",");
2481 
2482  std::vector<std::string> chains = MapAsymIDs2ChainIDs(asyms, db);
2483  pdbFile << RM("APPLY THE FOLLOWING TO CHAINS: ") << join(chains, ", ") << std::endl;
2484 
2485  for (auto oper_id : split<std::string>(oper_id_list, ",", true))
2486  {
2487  auto r = db["pdbx_struct_oper_list"].find_first(key("id") == oper_id);
2488 
2489  pdbFile << RM(" BIOMT1 ", -3) << Fs(r, "id")
2490  << SEP(" ", -9, 6) << Ff(r, "matrix[1][1]")
2491  << SEP(" ", -9, 6) << Ff(r, "matrix[1][2]")
2492  << SEP(" ", -9, 6) << Ff(r, "matrix[1][3]")
2493  << SEP(" ", -14, 5) << Ff(r, "vector[1]")
2494  << std::endl
2495  << RM(" BIOMT2 ", -3) << Fs(r, "id")
2496  << SEP(" ", -9, 6) << Ff(r, "matrix[2][1]")
2497  << SEP(" ", -9, 6) << Ff(r, "matrix[2][2]")
2498  << SEP(" ", -9, 6) << Ff(r, "matrix[2][3]")
2499  << SEP(" ", -14, 5) << Ff(r, "vector[2]")
2500  << std::endl
2501  << RM(" BIOMT3 ", -3) << Fs(r, "id")
2502  << SEP(" ", -9, 6) << Ff(r, "matrix[3][1]")
2503  << SEP(" ", -9, 6) << Ff(r, "matrix[3][2]")
2504  << SEP(" ", -9, 6) << Ff(r, "matrix[3][3]")
2505  << SEP(" ", -14, 5) << Ff(r, "vector[3]")
2506  << std::endl;
2507  }
2508  }
2509  }
2510 }
2511 
2512 void WriteRemark400(std::ostream &pdbFile, const datablock &db)
2513 {
2514  for (auto r : db["pdbx_entry_details"])
2515  {
2516  std::string compound_details = r["compound_details"].as<std::string>();
2517  if (not compound_details.empty())
2518  WriteOneContinuedLine(pdbFile, "REMARK 400", 0, "\nCOMPOUND\n" + compound_details);
2519  }
2520 }
2521 
2522 void WriteRemark450(std::ostream &pdbFile, const datablock &db)
2523 {
2524  for (auto r : db["pdbx_entry_details"])
2525  {
2526  std::string source_details = r["source_details"].as<std::string>();
2527  if (not source_details.empty())
2528  WriteOneContinuedLine(pdbFile, "REMARK 450", 0, "\nSOURCE\n" + source_details, 11);
2529  break;
2530  }
2531 }
2532 
2533 void WriteRemark465(std::ostream &pdbFile, const datablock &db)
2534 {
2535  bool first = true;
2536  typedef RM<465> RM;
2537 
2538  auto &c = db["pdbx_unobs_or_zero_occ_residues"];
2539  std::vector<row_handle> missing(c.begin(), c.end());
2540  stable_sort(missing.begin(), missing.end(), [](row_handle a, row_handle b) -> bool
2541  {
2542  int modelNrA, seqIDA, modelNrB, seqIDB;
2543  std::string asymIDA, asymIDB;
2544 
2545  cif::tie(modelNrA, asymIDA, seqIDA) = a.get("PDB_model_num", "auth_asym_id", "auth_seq_id");
2546  cif::tie(modelNrB, asymIDB, seqIDB) = b.get("PDB_model_num", "auth_asym_id", "auth_seq_id");
2547 
2548  int d = modelNrA - modelNrB;
2549  if (d == 0)
2550  d = asymIDA.compare(asymIDB);
2551  if (d == 0)
2552  d = seqIDA - seqIDB;
2553 
2554  return d < 0; });
2555 
2556  for (auto r : missing)
2557  {
2558  if (first)
2559  {
2560  pdbFile << RM("") << std::endl
2561  << RM("MISSING RESIDUES") << std::endl
2562  << RM("THE FOLLOWING RESIDUES WERE NOT LOCATED IN THE") << std::endl
2563  << RM("EXPERIMENT. (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN") << std::endl
2564  << RM("IDENTIFIER; SSSEQ=SEQUENCE NUMBER; I=INSERTION CODE.)") << std::endl
2565  << RM("") << std::endl
2566  << RM(" M RES C SSSEQI") << std::endl;
2567  first = false;
2568  }
2569 
2570  std::string modelNr, resName, chainID, iCode;
2571  int seqNr;
2572 
2573  cif::tie(modelNr, resName, chainID, iCode, seqNr) =
2574  r.get("PDB_model_num", "auth_comp_id", "auth_asym_id", "PDB_ins_code", "auth_seq_id");
2575 
2576  pdbFile << cif::format("REMARK 465 %3.3s %3.3s %1.1s %5d%1.1s", modelNr, resName, chainID, seqNr, iCode) << std::endl;
2577  }
2578 }
2579 
2580 void WriteRemark470(std::ostream &pdbFile, const datablock &db)
2581 {
2582  typedef RM<470> RM;
2583 
2584  // wow...
2585  typedef std::tuple<std::string, std::string, int, std::string, std::string> key_type;
2586  std::map<key_type, std::deque<std::string>> data;
2587 
2588  for (auto r : db["pdbx_unobs_or_zero_occ_atoms"])
2589  {
2590  std::string modelNr, resName, chainID, iCode, atomID;
2591  int seqNr;
2592 
2593  cif::tie(modelNr, resName, chainID, iCode, seqNr, atomID) =
2594  r.get("PDB_model_num", "auth_comp_id", "auth_asym_id", "PDB_ins_code", "auth_seq_id", "auth_atom_id");
2595 
2596  key_type k{ modelNr, chainID, seqNr, iCode, resName };
2597 
2598  auto i = data.find(k);
2599  if (i == data.end())
2600  data[k] = std::deque<std::string>{ atomID };
2601  else
2602  i->second.push_back(atomID);
2603  }
2604 
2605  if (not data.empty())
2606  {
2607  pdbFile << RM("") << std::endl
2608  << RM("MISSING ATOM") << std::endl
2609  << RM("THE FOLLOWING RESIDUES HAVE MISSING ATOMS (M=MODEL NUMBER;") << std::endl
2610  << RM("RES=RESIDUE NAME; C=CHAIN IDENTIFIER; SSEQ=SEQUENCE NUMBER;") << std::endl
2611  << RM("I=INSERTION CODE):") << std::endl
2612  << RM(" M RES CSSEQI ATOMS") << std::endl;
2613 
2614  for (auto &a : data)
2615  {
2616  std::string modelNr, resName, chainID, iCode;
2617  int seqNr;
2618 
2619  std::tie(modelNr, chainID, seqNr, iCode, resName) = a.first;
2620 
2621  while (not a.second.empty())
2622  {
2623  pdbFile << cif::format("REMARK 470 %3.3s %3.3s %1.1s%4d%1.1s ", modelNr, resName, chainID, seqNr, iCode) << " ";
2624 
2625  for (size_t i = 0; i < 6 and not a.second.empty(); ++i)
2626  {
2627  pdbFile << cif2pdbAtomName(a.second.front(), resName, db) << ' ';
2628  a.second.pop_front();
2629  }
2630 
2631  pdbFile << std::endl;
2632  }
2633  }
2634  }
2635 }
2636 
2637 void WriteRemark610(std::ostream &pdbFile, const datablock &db)
2638 {
2639  // #warning("unimplemented!");
2640 }
2641 
2642 void WriteRemark800(std::ostream &pdbFile, const datablock &db)
2643 {
2644  int nr = 0;
2645  for (auto r : db["struct_site"])
2646  {
2647  pdbFile << "REMARK 800" << std::endl;
2648  if (++nr == 1)
2649  {
2650  pdbFile << "REMARK 800 SITE" << std::endl;
2651  ++nr;
2652  }
2653 
2654  std::string ident, code, desc;
2655  cif::tie(ident, code, desc) = r.get("id", "pdbx_evidence_code", "details");
2656 
2657  to_upper(code);
2658 
2659  for (auto l : { "SITE_IDENTIFIER: " + ident, "EVIDENCE_CODE: " + code, "SITE_DESCRIPTION: " + desc })
2660  {
2661  for (auto s : word_wrap(l, 69))
2662  pdbFile << "REMARK 800 " << s << std::endl;
2663  };
2664  }
2665 }
2666 
2667 void WriteRemark999(std::ostream &pdbFile, const datablock &db)
2668 {
2669  for (auto r : db["pdbx_entry_details"])
2670  {
2671  std::string sequence_details = r["sequence_details"].as<std::string>();
2672  if (not sequence_details.empty())
2673  WriteOneContinuedLine(pdbFile, "REMARK 999", 0, "\nSEQUENCE\n" + sequence_details, 11);
2674  break;
2675  }
2676 }
2677 
2678 void WriteRemarks(std::ostream &pdbFile, const datablock &db)
2679 {
2680  WriteRemark1(pdbFile, db);
2681  WriteRemark2(pdbFile, db);
2682  WriteRemark3(pdbFile, db);
2683 
2684  WriteRemark200(pdbFile, db);
2685  WriteRemark280(pdbFile, db);
2686 
2687  WriteRemark350(pdbFile, db);
2688 
2689  WriteRemark400(pdbFile, db);
2690 
2691  WriteRemark465(pdbFile, db);
2692  WriteRemark470(pdbFile, db);
2693 
2694  WriteRemark610(pdbFile, db);
2695 
2696  WriteRemark800(pdbFile, db);
2697  WriteRemark999(pdbFile, db);
2698 }
2699 
2700 int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
2701 {
2702  int numSeq = 0;
2703 
2704  // DBREF
2705 
2706  for (auto r : db["struct_ref"])
2707  {
2708  std::string id, db_name, db_code;
2709  cif::tie(id, db_name, db_code) = r.get("id", "db_name", "db_code");
2710 
2711  for (auto r1 : db["struct_ref_seq"].find(key("ref_id") == id))
2712  {
2713  std::string idCode, chainID, insertBegin, insertEnd, dbAccession, dbinsBeg, dbinsEnd;
2714  std::string seqBegin, seqEnd, dbseqBegin, dbseqEnd;
2715 
2716  cif::tie(idCode, chainID, seqBegin, insertBegin, seqEnd, insertEnd, dbAccession, dbseqBegin, dbinsBeg, dbseqEnd, dbinsEnd) = r1.get("pdbx_PDB_id_code", "pdbx_strand_id", "pdbx_auth_seq_align_beg", "pdbx_seq_align_beg_ins_code", "pdbx_auth_seq_align_end",
2717  "pdbx_seq_align_end_ins_code", "pdbx_db_accession", "db_align_beg", "pdbx_db_align_beg_ins_code", "db_align_end", "pdbx_db_align_end_ins_code");
2718 
2719  if (dbAccession.length() > 8 or db_code.length() > 12 or atoi(dbseqEnd.c_str()) >= 100000)
2720  pdbFile << cif::format(
2721  "DBREF1 %4.4s %1.1s %4.4s%1.1s %4.4s%1.1s %-6.6s %-20.20s",
2722  idCode, chainID, seqBegin, insertBegin, seqEnd, insertEnd, db_name, db_code)
2723  << std::endl
2724  << cif::format(
2725  "DBREF2 %4.4s %1.1s %-22.22s %10.10s %10.10s",
2726  idCode, chainID, dbAccession, dbseqBegin, dbseqEnd)
2727  << std::endl;
2728  else
2729  pdbFile << cif::format(
2730  "DBREF %4.4s %1.1s %4.4s%1.1s %4.4s%1.1s %-6.6s %-8.8s %-12.12s %5.5s%1.1s %5.5s%1.1s",
2731  idCode, chainID, seqBegin, insertBegin, seqEnd, insertEnd, db_name, dbAccession, db_code, dbseqBegin, dbinsBeg, dbseqEnd, dbinsEnd)
2732  << std::endl;
2733  }
2734  }
2735 
2736  // SEQADV
2737 
2738  for (auto r : db["struct_ref_seq_dif"])
2739  {
2740  std::string idCode, resName, chainID, seqNum, iCode, database, dbAccession, dbRes, dbSeq, conflict;
2741 
2742  cif::tie(idCode, resName, chainID, seqNum, iCode, database, dbAccession, dbRes, dbSeq, conflict) = r.get("pdbx_PDB_id_code", "mon_id", "pdbx_pdb_strand_id", "pdbx_auth_seq_num", "pdbx_pdb_ins_code",
2743  "pdbx_seq_db_name", "pdbx_seq_db_accession_code", "db_mon_id", "pdbx_seq_db_seq_num",
2744  "details");
2745 
2746  to_upper(conflict);
2747 
2748  pdbFile << cif::format(
2749  "SEQADV %4.4s %3.3s %1.1s %4.4s%1.1s %-4.4s %-9.9s %3.3s %5.5s %-21.21s",
2750  idCode, resName, chainID, seqNum, iCode, database, dbAccession, dbRes, dbSeq, conflict)
2751  .str()
2752  << std::endl;
2753  }
2754 
2755  // SEQRES
2756 
2757  std::map<char, std::vector<std::string>> seqres;
2758  std::map<char, int> seqresl;
2759  for (auto r : db["pdbx_poly_seq_scheme"])
2760  {
2761  std::string chainID, res;
2762  cif::tie(chainID, res) = r.get("pdb_strand_id", "mon_id");
2763  if (chainID.empty() or res.length() > 3 or res.length() < 1)
2764  throw std::runtime_error("invalid pdbx_poly_seq_scheme record, chain: " + chainID + " res: " + res);
2765  seqres[chainID[0]].push_back(std::string(3 - res.length(), ' ') + res);
2766  ++seqresl[chainID[0]];
2767  }
2768 
2769  for (auto &&[chainID, seq] : seqres)
2770  {
2771  int n = 1;
2772  while (seq.empty() == false)
2773  {
2774  auto t = seq.size();
2775  if (t > 13)
2776  t = 13;
2777 
2778  pdbFile << cif::format(
2779  "SEQRES %3d %1.1s %4d %-51.51s ",
2780  n++, std::string{ chainID }, seqresl[chainID], join(seq.begin(), seq.begin() + t, " "))
2781  << std::endl;
2782 
2783  ++numSeq;
2784 
2785  seq.erase(seq.begin(), seq.begin() + t);
2786  }
2787  }
2788 
2789  // MODRES
2790 
2791  for (auto r : db["pdbx_struct_mod_residue"])
2792  {
2793  std::string chainID, seqNum, resName, iCode, stdRes, comment;
2794 
2795  cif::tie(chainID, seqNum, resName, iCode, stdRes, comment) =
2796  r.get("auth_asym_id", "auth_seq_id", "auth_comp_id", "PDB_ins_code", "parent_comp_id", "details");
2797 
2798  pdbFile << cif::format(
2799  "MODRES %4.4s %3.3s %1.1s %4.4s%1.1s %3.3s %-41.41s",
2800  db.name(), resName, chainID, seqNum, iCode, stdRes, comment)
2801  .str()
2802  << std::endl;
2803  }
2804 
2805  return numSeq;
2806 }
2807 
2808 int WriteHeterogen(std::ostream &pdbFile, const datablock &db)
2809 {
2810  int numHet = 0;
2811 
2812  std::string water_entity_id, water_comp_id;
2813  for (auto r : db["entity"].find(key("type") == std::string("water")))
2814  {
2815  water_entity_id = r["id"].as<std::string>();
2816  break;
2817  }
2818 
2819  std::map<std::string, std::string> het;
2820 
2821  for (auto r : db["chem_comp"])
2822  {
2823  std::string id, name, mon_nstd_flag;
2824  cif::tie(id, name, mon_nstd_flag) = r.get("id", "name", "mon_nstd_flag");
2825 
2826  if (mon_nstd_flag == "y")
2827  continue;
2828 
2829  het[id] = name;
2830  }
2831 
2832  for (auto r : db["pdbx_entity_nonpoly"])
2833  {
2834  std::string entity_id, name, comp_id;
2835  cif::tie(entity_id, name, comp_id) = r.get("entity_id", "name", "comp_id");
2836 
2837  if (entity_id == water_entity_id)
2838  water_comp_id = comp_id;
2839 
2840  if (het.count(comp_id) == 0)
2841  het[comp_id] = name;
2842  }
2843 
2844  struct HET
2845  {
2846  bool water;
2847  std::string hetID;
2848  char chainID;
2849  int seqNum;
2850  char iCode;
2851  int numHetAtoms;
2852  std::string text; // ignored
2853  };
2854  std::vector<HET> hets;
2855 
2856  // // construct component number map
2857  // map<int,int> component_nr;
2858  // std::string lChainID, lCompID, lICode;
2859  // int lSeqNum;
2860  //
2861  // for (auto r: db["atom_site"])
2862  // {
2863  // std::string chainID, compID, iCode;
2864  // int seqNum;
2865  //
2866  // tie(seqNum, comp_id, chain_id, iCode) =
2867  // r.get("auth_seq_id", "auth_comp_id", "auth_asym_id", "pdbx_PDB_ins_code");
2868  //
2869  // if (chainID != lChainID or compID != lCompID or seqNum != lSeqNum or iCode != lICode)
2870  //
2871  // }
2872 
2873  // count the HETATM's
2874  // for (auto r: db["atom_site"].find(key("group_PDB") == std::string("HETATM")))
2875  std::set<std::string> missingHetNames;
2876 
2877  for (auto r : db["atom_site"])
2878  {
2879  int seqNum;
2880  std::string entity_id, comp_id, chain_id, iCode, modelNr;
2881 
2882  cif::tie(entity_id, seqNum, comp_id, chain_id, iCode, modelNr) =
2883  r.get("label_entity_id", "auth_seq_id", "auth_comp_id", "auth_asym_id", "pdbx_PDB_ins_code", "pdbx_PDB_model_num");
2884 
2885  if (compound_factory::kAAMap.count(comp_id) or compound_factory::kBaseMap.count(comp_id))
2886  continue;
2887 
2888  if (chain_id.length() != 1)
2889  throw std::runtime_error("Cannot produce PDB file, auth_asym_id not valid");
2890 
2891  if (entity_id != water_entity_id and het.count(comp_id) == 0)
2892  missingHetNames.insert(comp_id);
2893 
2894  auto h = find_if(hets.begin(), hets.end(),
2895  [=](const HET &het) -> bool
2896  {
2897  return het.hetID == comp_id and het.chainID == chain_id[0] and het.seqNum == seqNum;
2898  });
2899 
2900  if (h == hets.end())
2901  {
2902  hets.push_back({ entity_id == water_entity_id, comp_id, chain_id[0], seqNum,
2903  (iCode.empty() ? ' ' : iCode[0]), 1 });
2904  }
2905  else
2906  h->numHetAtoms += 1;
2907  }
2908 
2909  if (VERBOSE > 1 and not missingHetNames.empty())
2910  std::cerr << "Missing het name(s) for " << join(missingHetNames, ", ") << std::endl;
2911 
2912  for (auto h : hets)
2913  {
2914  if (h.water)
2915  continue;
2916  pdbFile << cif::format("HET %3.3s %c%4d%c %5d", h.hetID, h.chainID, h.seqNum, h.iCode, h.numHetAtoms) << std::endl;
2917  ++numHet;
2918  }
2919 
2920  for (auto &&[id, name] : het)
2921  {
2922  if (id == water_comp_id)
2923  continue;
2924 
2925  to_upper(name);
2926 
2927  int c = 1;
2928 
2929  for (;;)
2930  {
2931  pdbFile << cif::format("HETNAM %2.2s %3.3s ", (c > 1 ? std::to_string(c) : std::string()), id);
2932  ++c;
2933 
2934  if (name.length() > 55)
2935  {
2936  bool done = false;
2937  for (auto e = name.begin() + 54; e != name.begin(); --e)
2938  {
2939  if (ispunct(*e))
2940  {
2941  pdbFile << std::string(name.begin(), e) << std::endl;
2942  name.erase(name.begin(), e);
2943  done = true;
2944  break;
2945  }
2946  }
2947 
2948  if (not done)
2949  {
2950  pdbFile << std::string(name.begin(), name.begin() + 55) << std::endl;
2951  name.erase(name.begin(), name.begin() + 55);
2952  }
2953 
2954  continue;
2955  }
2956 
2957  pdbFile << name << std::endl;
2958  break;
2959  }
2960  }
2961 
2962  for (auto &&[id, name] : het)
2963  {
2964  if (id == water_comp_id)
2965  continue;
2966 
2967  std::string syn = db["chem_comp"].find_first<std::string>(key("id") == id, "pdbx_synonyms");
2968  if (syn.empty())
2969  continue;
2970 
2971  WriteOneContinuedLine(pdbFile, "HETSYN", 4, id + ' ' + syn, 11);
2972  }
2973 
2974  // FORMUL
2975 
2976  std::vector<std::string> formulas;
2977 
2978  for (auto h : het)
2979  {
2980  std::string hetID = h.first;
2981  int componentNr = 0;
2982 
2983  std::string first_het_asym_id;
2984  for (auto p : db["pdbx_poly_seq_scheme"].find(key("mon_id") == hetID))
2985  {
2986  first_het_asym_id = p["asym_id"].as<std::string>();
2987  break;
2988  }
2989 
2990  if (first_het_asym_id.empty())
2991  {
2992  for (auto p : db["pdbx_nonpoly_scheme"].find(key("mon_id") == hetID))
2993  {
2994  first_het_asym_id = p["asym_id"].as<std::string>();
2995  break;
2996  }
2997  }
2998 
2999  if (not first_het_asym_id.empty())
3000  {
3001  for (auto a : db["struct_asym"])
3002  {
3003  ++componentNr;
3004  if (a["id"] == first_het_asym_id)
3005  break;
3006  }
3007  }
3008 
3009  auto nr = count_if(hets.begin(), hets.end(), [hetID](auto &h) -> bool
3010  { return h.hetID == hetID; });
3011 
3012  for (auto r : db["chem_comp"].find(key("id") == hetID))
3013  {
3014  std::string formula = r["formula"].as<std::string>();
3015  if (nr > 1)
3016  formula = std::to_string(nr) + '(' + formula + ')';
3017 
3018  int c = 1;
3019  for (;;)
3020  {
3021  std::stringstream fs;
3022 
3023  fs << cif::format("FORMUL %2d %3.3s %2.2s%c", componentNr, hetID, (c > 1 ? std::to_string(c) : std::string()), (hetID == water_comp_id ? '*' : ' '));
3024  ++c;
3025 
3026  if (formula.length() > 51)
3027  {
3028  bool done = false;
3029  for (auto e = formula.begin() + 50; e != formula.begin(); --e)
3030  {
3031  if (ispunct(*e))
3032  {
3033  pdbFile << std::string(formula.begin(), e) << std::endl;
3034  formula.erase(formula.begin(), e);
3035  done = true;
3036  break;
3037  }
3038  }
3039 
3040  if (not done)
3041  {
3042  pdbFile << std::string(formula.begin(), formula.begin() + 55) << std::endl;
3043  formula.erase(formula.begin(), formula.begin() + 55);
3044  }
3045 
3046  continue;
3047  }
3048 
3049  fs << formula << std::endl;
3050 
3051  formulas.push_back(fs.str());
3052  break;
3053  }
3054 
3055  break;
3056  }
3057  }
3058 
3059  sort(formulas.begin(), formulas.end(), [](const std::string &a, const std::string &b) -> bool
3060  { return stoi(a.substr(8, 2)) < stoi(b.substr(8, 2)); });
3061 
3062  for (auto &f : formulas)
3063  pdbFile << f;
3064 
3065  return numHet;
3066 }
3067 
3068 std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const datablock &db)
3069 {
3070  int numHelix = 0, numSheet = 0;
3071 
3072  // HELIX
3073  for (auto r : db["struct_conf"].find(key("conf_type_id") == "HELX_P"))
3074  {
3075  std::string pdbx_PDB_helix_id, beg_label_comp_id, pdbx_beg_PDB_ins_code,
3076  end_label_comp_id, pdbx_end_PDB_ins_code, beg_auth_comp_id,
3077  beg_auth_asym_id, end_auth_comp_id, end_auth_asym_id, details;
3078  int pdbx_PDB_helix_class, pdbx_PDB_helix_length, beg_auth_seq_id, end_auth_seq_id;
3079 
3080  cif::tie(pdbx_PDB_helix_id, beg_label_comp_id, pdbx_beg_PDB_ins_code,
3081  end_label_comp_id, pdbx_end_PDB_ins_code, beg_auth_comp_id,
3082  beg_auth_asym_id, end_auth_comp_id, end_auth_asym_id, details,
3083  pdbx_PDB_helix_class, pdbx_PDB_helix_length, beg_auth_seq_id, end_auth_seq_id) =
3084  r.get("pdbx_PDB_helix_id", "beg_label_comp_id", "pdbx_beg_PDB_ins_code",
3085  "end_label_comp_id", "pdbx_end_PDB_ins_code", "beg_auth_comp_id",
3086  "beg_auth_asym_id", "end_auth_comp_id", "end_auth_asym_id", "details",
3087  "pdbx_PDB_helix_class", "pdbx_PDB_helix_length", "beg_auth_seq_id", "end_auth_seq_id");
3088 
3089  ++numHelix;
3090  pdbFile << cif::format("HELIX %3d %3.3s %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s%2d%-30.30s %5d",
3091  numHelix, pdbx_PDB_helix_id, beg_label_comp_id, beg_auth_asym_id, beg_auth_seq_id, pdbx_beg_PDB_ins_code, end_label_comp_id, end_auth_asym_id, end_auth_seq_id, pdbx_end_PDB_ins_code, pdbx_PDB_helix_class, details, pdbx_PDB_helix_length)
3092  << std::endl;
3093  }
3094 
3095  for (auto r : db["struct_sheet"])
3096  {
3097  std::string sheetID;
3098  int numStrands = 0;
3099 
3100  cif::tie(sheetID, numStrands) = r.get("id", "number_strands");
3101 
3102  bool first = true;
3103 
3104  for (auto o : db["struct_sheet_order"].find(key("sheet_id") == sheetID))
3105  {
3106  int sense = 0;
3107  std::string s, rangeID1, rangeID2;
3108 
3109  cif::tie(s, rangeID1, rangeID2) = o.get("sense", "range_id_1", "range_id_2");
3110  if (s == "anti-parallel")
3111  sense = -1;
3112  else if (s == "parallel")
3113  sense = 1;
3114 
3115  if (first)
3116  {
3117  std::string initResName, initChainID, initICode, endResName, endChainID, endICode;
3118  int initSeqNum, endSeqNum;
3119 
3120  auto r1 = db["struct_sheet_range"].find_first(key("sheet_id") == sheetID and key("id") == rangeID1);
3121 
3122  cif::tie(initResName, initICode, endResName, endICode,
3123  initResName, initChainID, initSeqNum, endResName, endChainID, endSeqNum) = r1.get("beg_label_comp_id", "pdbx_beg_PDB_ins_code", "end_label_comp_id",
3124  "pdbx_end_PDB_ins_code", "beg_auth_comp_id", "beg_auth_asym_id", "beg_auth_seq_id",
3125  "end_auth_comp_id", "end_auth_asym_id", "end_auth_seq_id");
3126 
3127  pdbFile << cif::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d", rangeID1, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, 0) << std::endl;
3128 
3129  first = false;
3130  }
3131 
3132  std::string initResName, initChainID, initICode, endResName, endChainID, endICode, curAtom, curResName, curChainID, curICode, prevAtom, prevResName, prevChainID, prevICode;
3133  int initSeqNum, endSeqNum, curResSeq, prevResSeq;
3134 
3135  auto r2 = db["struct_sheet_range"].find_first(key("sheet_id") == sheetID and key("id") == rangeID2);
3136 
3137  cif::tie(initResName, initICode, endResName, endICode,
3138  initResName, initChainID, initSeqNum, endResName, endChainID, endSeqNum) = r2.get("beg_label_comp_id", "pdbx_beg_PDB_ins_code", "end_label_comp_id",
3139  "pdbx_end_PDB_ins_code", "beg_auth_comp_id", "beg_auth_asym_id", "beg_auth_seq_id",
3140  "end_auth_comp_id", "end_auth_asym_id", "end_auth_seq_id");
3141 
3142  auto h = db["pdbx_struct_sheet_hbond"].find(key("sheet_id") == sheetID and key("range_id_1") == rangeID1 and key("range_id_2") == rangeID2);
3143 
3144  if (h.empty())
3145  {
3146  pdbFile << cif::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d", rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense) << std::endl;
3147  }
3148  else
3149  {
3150  std::string compID[2];
3151  cif::tie(compID[0], compID[1]) = h.front().get("range_2_label_comp_id", "range_1_label_comp_id");
3152 
3153  cif::tie(curAtom, curResName, curResSeq, curChainID, curICode, prevAtom, prevResName, prevResSeq, prevChainID, prevICode) = h.front().get("range_2_auth_atom_id", "range_2_auth_comp_id", "range_2_auth_seq_id", "range_2_auth_asym_id", "range_2_PDB_ins_code",
3154  "range_1_auth_atom_id", "range_1_auth_comp_id", "range_1_auth_seq_id", "range_1_auth_asym_id", "range_1_PDB_ins_code");
3155 
3156  curAtom = cif2pdbAtomName(curAtom, compID[0], db);
3157  prevAtom = cif2pdbAtomName(prevAtom, compID[1], db);
3158 
3159  pdbFile << cif::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d "
3160  "%-4.4s%3.3s %1.1s%4d%1.1s %-4.4s%3.3s %1.1s%4d%1.1s",
3161  rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense, curAtom, curResName, curChainID, curResSeq, curICode, prevAtom, prevResName, prevChainID, prevResSeq, prevICode)
3162  << std::endl;
3163  }
3164 
3165  ++numSheet;
3166  }
3167  }
3168 
3169  return std::make_tuple(numHelix, numSheet);
3170 }
3171 
3172 void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
3173 {
3174  // SSBOND
3175  // have to filter out alts
3176  std::set<std::tuple<char, int, char, char, int, char>> ssSeen;
3177 
3178  int nr = 1;
3179  for (auto r : db["struct_conn"].find(key("conn_type_id") == "disulf"))
3180  {
3181  std::string chainID1, icode1, chainID2, icode2, sym1, sym2;
3182  float Length;
3183  int seqNum1, seqNum2;
3184 
3185  cif::tie(
3186  chainID1, seqNum1, icode1, chainID2, seqNum2, icode2, sym1, sym2, Length) =
3187  r.get("ptnr1_auth_asym_id", "ptnr1_auth_seq_id", "pdbx_ptnr1_PDB_ins_code",
3188  "ptnr2_auth_asym_id", "ptnr2_auth_seq_id", "pdbx_ptnr2_PDB_ins_code",
3189  "ptnr1_symmetry", "ptnr2_symmetry", "pdbx_dist_value");
3190 
3191  auto n = ssSeen.emplace(chainID1[0], seqNum1, icode1[0], chainID2[0], seqNum2, icode2[0]);
3192  if (n.second == false)
3193  continue;
3194 
3195  sym1 = cif2pdbSymmetry(sym1);
3196  sym2 = cif2pdbSymmetry(sym2);
3197 
3198  pdbFile << cif::format("SSBOND %3d CYS %1.1s %4d%1.1s CYS %1.1s %4d%1.1s %6.6s %6.6s %5.2f", nr, chainID1, seqNum1, icode1, chainID2, seqNum2, icode2, sym1, sym2, Length) << std::endl;
3199 
3200  ++nr;
3201  }
3202 
3203  // LINK
3204 
3205  for (auto r : db["struct_conn"].find(key("conn_type_id") == "metalc" or key("conn_type_id") == "covale"))
3206  {
3207  std::string name1, altLoc1, resName1, chainID1, iCode1, name2, altLoc2, resName2, chainID2, iCode2, sym1, sym2, Length;
3208  int resSeq1, resSeq2;
3209 
3210  cif::tie(name1, altLoc1, resName1, chainID1, resSeq1, iCode1, name2, altLoc2, resName2, chainID2, resSeq2, iCode2, sym1, sym2, Length) =
3211  r.get("ptnr1_label_atom_id", "pdbx_ptnr1_label_alt_id", "ptnr1_label_comp_id", "ptnr1_auth_asym_id", "ptnr1_auth_seq_id", "pdbx_ptnr1_PDB_ins_code",
3212  "ptnr2_label_atom_id", "pdbx_ptnr2_label_alt_id", "ptnr2_label_comp_id", "ptnr2_auth_asym_id", "ptnr2_auth_seq_id", "pdbx_ptnr2_PDB_ins_code",
3213  "ptnr1_symmetry", "ptnr2_symmetry", "pdbx_dist_value");
3214 
3215  std::string compID[2];
3216 
3217  cif::tie(compID[0], compID[1]) = r.get("ptnr1_label_comp_id", "ptnr2_label_comp_id");
3218 
3219  name1 = cif2pdbAtomName(name1, compID[0], db);
3220  name2 = cif2pdbAtomName(name2, compID[1], db);
3221 
3222  sym1 = cif2pdbSymmetry(sym1);
3223  sym2 = cif2pdbSymmetry(sym2);
3224 
3225  pdbFile << cif::format("LINK %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %6.6s %6.6s", name1, altLoc1, resName1, chainID1, resSeq1, iCode1, name2, altLoc2, resName2, chainID2, resSeq2, iCode2, sym1, sym2);
3226 
3227  if (not Length.empty())
3228  pdbFile << cif::format(" %5.2f", stod(Length));
3229 
3230  pdbFile << std::endl;
3231  }
3232 
3233  // CISPEP
3234 
3235  for (auto r : db["struct_mon_prot_cis"])
3236  {
3237  std::string serNum, pep1, chainID1, icode1, pep2, chainID2, icode2, modNum;
3238  int seqNum1, seqNum2;
3239  float measure;
3240 
3241  cif::tie(serNum, pep1, chainID1, seqNum1, icode1, pep2, chainID2, seqNum2, icode2, modNum, measure) =
3242  r.get("pdbx_id", "label_comp_id", "auth_asym_id", "auth_seq_id", "pdbx_PDB_ins_code",
3243  "pdbx_label_comp_id_2", "pdbx_auth_asym_id_2", "pdbx_auth_seq_id_2", "pdbx_PDB_ins_code_2",
3244  "pdbx_PDB_model_num", "pdbx_omega_angle");
3245 
3246  pdbFile << cif::format("CISPEP %3.3s %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s %3.3s %6.2f",
3247  serNum, pep1, chainID1, seqNum1, icode1, pep2, chainID2, seqNum2, icode2, modNum, measure) << std::endl;
3248  }
3249 }
3250 
3251 int WriteMiscellaneousFeatures(std::ostream &pdbFile, const datablock &db)
3252 {
3253  int numSite = 0;
3254 
3255  // SITE
3256 
3257  std::map<std::string, std::deque<std::string>> sites;
3258 
3259  for (auto r : db["struct_site_gen"])
3260  {
3261  std::string siteID, resName, chainID, iCode;
3262  int seq;
3263 
3264  cif::tie(siteID, resName, chainID, seq, iCode) =
3265  r.get("site_id", "auth_comp_id", "auth_asym_id", "auth_seq_id", "pdbx_auth_ins_code");
3266 
3267  sites[siteID].push_back(cif::format("%3.3s %1.1s%4d%1.1s ", resName, chainID, seq, iCode).str());
3268  }
3269 
3270  for (auto s : sites)
3271  {
3272  std::string siteID = std::get<0>(s);
3273  std::deque<std::string> &res = std::get<1>(s);
3274 
3275  size_t numRes = res.size();
3276 
3277  int nr = 1;
3278  while (res.empty() == false)
3279  {
3280  pdbFile << cif::format("SITE %3d %3.3s %2d ", nr, siteID, numRes);
3281 
3282  for (int i = 0; i < 4; ++i)
3283  {
3284  if (not res.empty())
3285  {
3286  pdbFile << res.front();
3287  res.pop_front();
3288  }
3289  else
3290  pdbFile << std::string(11, ' ');
3291  }
3292 
3293  pdbFile << std::endl;
3294  ++nr;
3295  ++numSite;
3296  }
3297  }
3298 
3299  return numSite;
3300 }
3301 
3302 void WriteCrystallographic(std::ostream &pdbFile, const datablock &db)
3303 {
3304  auto r = db["symmetry"].find_first(key("entry_id") == db.name());
3305  std::string symmetry = r["space_group_name_H-M"].as<std::string>();
3306 
3307  r = db["cell"].find_first(key("entry_id") == db.name());
3308 
3309  pdbFile << cif::format("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11.11s%4d", r["length_a"].as<double>(), r["length_b"].as<double>(), r["length_c"].as<double>(), r["angle_alpha"].as<double>(), r["angle_beta"].as<double>(), r["angle_gamma"].as<double>(), symmetry, r["Z_PDB"].as<int>()) << std::endl;
3310 }
3311 
3312 int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)
3313 {
3314  int result = 0;
3315 
3316  for (auto r : db["database_PDB_matrix"])
3317  {
3318  pdbFile << cif::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 1, r["origx[1][1]"].as<float>(), r["origx[1][2]"].as<float>(), r["origx[1][3]"].as<float>(), r["origx_vector[1]"].as<float>()) << std::endl;
3319  pdbFile << cif::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 2, r["origx[2][1]"].as<float>(), r["origx[2][2]"].as<float>(), r["origx[2][3]"].as<float>(), r["origx_vector[2]"].as<float>()) << std::endl;
3320  pdbFile << cif::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 3, r["origx[3][1]"].as<float>(), r["origx[3][2]"].as<float>(), r["origx[3][3]"].as<float>(), r["origx_vector[3]"].as<float>()) << std::endl;
3321  result += 3;
3322  break;
3323  }
3324 
3325  for (auto r : db["atom_sites"])
3326  {
3327  pdbFile << cif::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 1, r["fract_transf_matrix[1][1]"].as<float>(), r["fract_transf_matrix[1][2]"].as<float>(), r["fract_transf_matrix[1][3]"].as<float>(), r["fract_transf_vector[1]"].as<float>()) << std::endl;
3328  pdbFile << cif::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 2, r["fract_transf_matrix[2][1]"].as<float>(), r["fract_transf_matrix[2][2]"].as<float>(), r["fract_transf_matrix[2][3]"].as<float>(), r["fract_transf_vector[2]"].as<float>()) << std::endl;
3329  pdbFile << cif::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 3, r["fract_transf_matrix[3][1]"].as<float>(), r["fract_transf_matrix[3][2]"].as<float>(), r["fract_transf_matrix[3][3]"].as<float>(), r["fract_transf_vector[3]"].as<float>()) << std::endl;
3330  result += 3;
3331  break;
3332  }
3333 
3334  int nr = 1;
3335  for (auto r : db["struct_ncs_oper"])
3336  {
3337  std::string given = r["code"] == "given" ? "1" : "";
3338 
3339  pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 1, nr, r["matrix[1][1]"].as<float>(), r["matrix[1][2]"].as<float>(), r["matrix[1][3]"].as<float>(), r["vector[1]"].as<float>(), given) << std::endl;
3340  pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 2, nr, r["matrix[2][1]"].as<float>(), r["matrix[2][2]"].as<float>(), r["matrix[2][3]"].as<float>(), r["vector[2]"].as<float>(), given) << std::endl;
3341  pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 3, nr, r["matrix[3][1]"].as<float>(), r["matrix[3][2]"].as<float>(), r["matrix[3][3]"].as<float>(), r["vector[3]"].as<float>(), given) << std::endl;
3342 
3343  ++nr;
3344  result += 3;
3345  }
3346 
3347  return result;
3348 }
3349 
3350 std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datablock &db,
3351  const std::map<std::string, std::tuple<std::string, int, std::string>> &last_resseq_for_chain_map,
3352  std::set<std::string> &terminatedChains, int model_nr)
3353 {
3354  using namespace cif::literals;
3355 
3356  int numCoord = 0, numTer = 0;
3357 
3358  auto &atom_site = db["atom_site"];
3359  auto &atom_site_anisotrop = db["atom_site_anisotrop"];
3360  auto &entity = db["entity"];
3361  auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
3362  //auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"];
3363  auto &pdbx_branch_scheme = db["pdbx_branch_scheme"];
3364 
3365  int serial = 1;
3366  auto ri = atom_site.begin();
3367 
3368  std::string id, group, name, altLoc, resName, chainID, iCode, element;
3369  int resSeq = 0, charge;
3370 
3371  for (;;)
3372  {
3373  std::string nextResName, nextChainID, nextICode, modelNum;
3374  int nextResSeq = 0;
3375 
3376  if (ri != atom_site.end())
3377  cif::tie(nextResName, nextChainID, nextICode, nextResSeq, modelNum) =
3378  (*ri).get("label_comp_id", "auth_asym_id", "pdbx_PDB_ins_code", "auth_seq_id", "pdbx_PDB_model_num");
3379 
3380  if (modelNum.empty() == false)
3381  {
3382  int nr = 0;
3383  auto r = std::from_chars(modelNum.data(), modelNum.data() + modelNum.length(), nr);
3384  if (r.ec != std::errc())
3385  {
3386  if (VERBOSE > 0)
3387  std::cerr << "Model number '" << modelNum << "' is not a valid integer" << std::endl;
3388  }
3389 
3390  if (nr != model_nr)
3391  {
3392  ++ri;
3393  continue;
3394  }
3395  }
3396 
3397  if (chainID.empty() == false and terminatedChains.count(chainID) == 0)
3398  {
3399  bool terminate = nextChainID != chainID;
3400 
3401  if (not terminate)
3402  terminate =
3403  (nextResSeq != resSeq or iCode != nextICode) and
3404  (last_resseq_for_chain_map.count(chainID) == false or last_resseq_for_chain_map.at(chainID) == make_tuple(resName, resSeq, iCode));
3405 
3406  if (terminate)
3407  {
3408  pdbFile << cif::format("TER %5d %3.3s %1.1s%4d%1.1s", serial, resName, chainID, resSeq, iCode) << std::endl;
3409 
3410  ++serial;
3411  terminatedChains.insert(chainID);
3412 
3413  ++numTer;
3414  }
3415  }
3416 
3417  if (ri == atom_site.end())
3418  break;
3419 
3420  auto r = *ri++;
3421 
3422  try
3423  {
3424  if (r["pdbx_PDB_model_num"].as<int>() != model_nr)
3425  continue;
3426  }
3427  catch (...)
3428  { /* perhaps no model number here */
3429  }
3430 
3431  float x, y, z, occupancy, tempFactor;
3432 
3433  cif::tie(id, group, name, altLoc, resName, chainID, resSeq, iCode, x, y, z, occupancy, tempFactor, element, charge) =
3434  r.get("id", "group_PDB", "label_atom_id", "label_alt_id", "auth_comp_id", "auth_asym_id", "auth_seq_id",
3435  "pdbx_PDB_ins_code", "Cartn_x", "Cartn_y", "Cartn_z", "occupancy", "B_iso_or_equiv", "type_symbol", "pdbx_formal_charge");
3436 
3437  int entity_id = r.get<int>("label_entity_id");
3438  auto type = entity.find1<std::string>("id"_key == entity_id, "type");
3439 
3440  if (type == "branched") // find the real auth_seq_num, since sugars have their auth_seq_num reused as sugar number... sigh.
3441  resSeq = pdbx_branch_scheme.find1<int>("asym_id"_key == r.get<std::string>("label_asym_id") and "pdb_seq_num"_key == resSeq, "auth_seq_num");
3442  // else if (type == "non-polymer") // same for non-polymers
3443  // resSeq = pdbx_nonpoly_scheme.find1<int>("asym_id"_key == r.get<std::string>("label_asym_id") and "pdb_seq_num"_key == resSeq, "auth_seq_num");
3444  else if (type == "polymer")
3445  resSeq = pdbx_poly_seq_scheme.find1<int>("asym_id"_key == r.get<std::string>("label_asym_id") and "pdb_seq_num"_key == resSeq, "auth_seq_num");
3446 
3447  if (chainID.length() > 1)
3448  throw std::runtime_error("Chain ID " + chainID + " won't fit into a PDB file");
3449 
3450  if (name.length() < 4 and (element.length() == 1 or std::toupper(name[0]) != std::toupper(element[0]) or std::toupper(name[1]) != std::toupper(element[1])))
3451  name.insert(name.begin(), ' ');
3452 
3453  std::string sCharge;
3454  if (charge != 0)
3455  sCharge = std::to_string(charge) + (charge > 0 ? '+' : '-');
3456 
3457  pdbFile << cif::format("%-6.6s%5d %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %8.3f%8.3f%8.3f%6.2f%6.2f %2.2s%2.2s", group, serial, name, altLoc, resName, chainID, resSeq, iCode, x, y, z, occupancy, tempFactor, element, sCharge) << std::endl;
3458 
3459  ++numCoord;
3460 
3461  auto ai = atom_site_anisotrop.find_first(key("id") == id);
3462  if (not ai.empty())
3463  //
3464  // auto ai = find_if(atom_site_anisotrop.begin(), atom_site_anisotrop.end(), [id](row_handle r) -> bool { return r["id"] == id; });
3465  // if (ai != atom_site_anisotrop.end())
3466  {
3467  float u11, u22, u33, u12, u13, u23;
3468 
3469  tie(u11, u22, u33, u12, u13, u23) =
3470  ai.get("U[1][1]", "U[2][2]", "U[3][3]", "U[1][2]", "U[1][3]", "U[2][3]");
3471 
3472  pdbFile << cif::format("ANISOU%5d %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %7d%7d%7d%7d%7d%7d %2.2s%2.2s", serial, name, altLoc, resName, chainID, resSeq, iCode, std::lrintf(u11 * 10000), std::lrintf(u22 * 10000), std::lrintf(u33 * 10000), std::lrintf(u12 * 10000), std::lrintf(u13 * 10000), std::lrintf(u23 * 10000), element, sCharge) << std::endl;
3473  }
3474 
3475  ++serial;
3476  }
3477 
3478  return std::make_tuple(numCoord, numTer);
3479 }
3480 
3481 std::tuple<int, int> WriteCoordinate(std::ostream &pdbFile, const datablock &db)
3482 {
3483  // residues known from seqres
3484  // map<tuple<std::string,int,std::string>,std::string> res2chain_map;
3485  std::map<std::string, std::tuple<std::string, int, std::string>> last_resseq_for_chain_map;
3486 
3487  for (auto r : db["pdbx_poly_seq_scheme"])
3488  {
3489  std::string chainID, resName, iCode;
3490  int resSeq;
3491 
3492  if (r["auth_seq_num"].empty())
3493  continue;
3494 
3495  cif::tie(chainID, resName, resSeq, iCode) = r.get("pdb_strand_id", "pdb_mon_id", "auth_seq_num", "pdb_ins_code");
3496 
3497  last_resseq_for_chain_map[chainID] = make_tuple(resName, resSeq, iCode);
3498  // res2chain_map[make_tuple(resName, resSeq, iCode)] = chainID;
3499  }
3500 
3501  // collect known model numbers
3502  std::set<int> models;
3503  try
3504  {
3505  for (auto r : db["atom_site"])
3506  models.insert(r["pdbx_PDB_model_num"].as<int>());
3507  }
3508  catch (...)
3509  {
3510  }
3511 
3512  std::tuple<int, int> result;
3513 
3514  if (models.empty() or models == std::set<int>{ 0 })
3515  {
3516  std::set<std::string> TERminatedChains;
3517  result = WriteCoordinatesForModel(pdbFile, db, last_resseq_for_chain_map, TERminatedChains, 0);
3518  }
3519  else
3520  {
3521  for (int model_nr : models)
3522  {
3523  if (models.size() > 1)
3524  pdbFile << cif::format("MODEL %4d", model_nr) << std::endl;
3525 
3526  std::set<std::string> TERminatedChains;
3527  auto n = WriteCoordinatesForModel(pdbFile, db, last_resseq_for_chain_map, TERminatedChains, model_nr);
3528  if (model_nr == 1)
3529  result = n;
3530 
3531  if (models.size() > 1)
3532  pdbFile << "ENDMDL" << std::endl;
3533  }
3534  }
3535 
3536  return result;
3537 }
3538 
3539 void WritePDBHeaderLines(std::ostream &os, const datablock &db)
3540 {
3541  fill_out_streambuf fb(os);
3542  write_header_lines(os, db);
3543 }
3544 
3545 std::string FixStringLength(const std::string &s, std::string::size_type l)
3546 {
3547  auto result = s;
3548 
3549  if (result.length() > l)
3550  result = result.substr(0, l - 4) + "... ";
3551  else if (result.length() < l)
3552  result.append(l - result.length(), ' ');
3553 
3554  return result;
3555 }
3556 
3557 std::string get_HEADER_line(const datablock &db, std::string::size_type truncate_at)
3558 {
3559  // 0 1 2 3 4 5 6 7 8
3560  // HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII
3561 
3562  // HEADER
3563 
3564  std::string keywords;
3565  auto &cat1 = db["struct_keywords"];
3566 
3567  for (auto r : cat1)
3568  {
3569  keywords = r["pdbx_keywords"].as<std::string>();
3570  if (keywords.length() > truncate_at - 40)
3571  keywords = keywords.substr(0, truncate_at - 44) + " ...";
3572  }
3573 
3574  std::string date;
3575  for (auto r : db["pdbx_database_status"])
3576  {
3577  date = r["recvd_initial_deposition_date"].as<std::string>();
3578  if (date.empty())
3579  continue;
3580  date = cif2pdbDate(date);
3581  break;
3582  }
3583 
3584  if (date.empty())
3585  {
3586  for (auto r : db["database_PDB_rev"])
3587  {
3588  date = r["date_original"].as<std::string>();
3589  if (date.empty())
3590  continue;
3591  date = cif2pdbDate(date);
3592  break;
3593  }
3594  }
3595 
3596  return FixStringLength(cif::format("HEADER %-40.40s%-9.9s %-4.4s", keywords, date, db.name()).str(), truncate_at);
3597 }
3598 
3599 std::string get_COMPND_line(const datablock &db, std::string::size_type truncate_at)
3600 {
3601  // COMPND
3602  using namespace std::placeholders;
3603 
3604  int molID = 0;
3605  std::vector<std::string> cmpnd;
3606 
3607  for (auto r : db["entity"])
3608  {
3609  if (r["type"] != "polymer")
3610  continue;
3611 
3612  std::string entityID = r["id"].as<std::string>();
3613 
3614  ++molID;
3615  cmpnd.push_back("MOL_ID: " + std::to_string(molID));
3616 
3617  std::string molecule = r["pdbx_description"].as<std::string>();
3618  cmpnd.push_back("MOLECULE: " + molecule);
3619 
3620  auto poly = db["entity_poly"].find(key("entity_id") == entityID);
3621  if (not poly.empty())
3622  {
3623  std::string chains = poly.front()["pdbx_strand_id"].as<std::string>();
3624  replace_all(chains, ",", ", ");
3625  cmpnd.push_back("CHAIN: " + chains);
3626  }
3627 
3628  std::string fragment = r["pdbx_fragment"].as<std::string>();
3629  if (not fragment.empty())
3630  cmpnd.push_back("FRAGMENT: " + fragment);
3631 
3632  for (auto sr : db["entity_name_com"].find(key("entity_id") == entityID))
3633  {
3634  std::string syn = sr["name"].as<std::string>();
3635  if (not syn.empty())
3636  cmpnd.push_back("SYNONYM: " + syn);
3637  }
3638 
3639  std::string mutation = r["pdbx_mutation"].as<std::string>();
3640  if (not mutation.empty())
3641  cmpnd.push_back("MUTATION: " + mutation);
3642 
3643  std::string ec = r["pdbx_ec"].as<std::string>();
3644  if (not ec.empty())
3645  cmpnd.push_back("EC: " + ec);
3646 
3647  if (r["src_method"] == "man" or r["src_method"] == "syn")
3648  cmpnd.push_back("ENGINEERED: YES");
3649 
3650  std::string details = r["details"].as<std::string>();
3651  if (not details.empty())
3652  cmpnd.push_back("OTHER_DETAILS: " + details);
3653  }
3654 
3655  return FixStringLength("COMPND " + join(cmpnd, "; "), truncate_at);
3656 }
3657 
3658 std::string get_SOURCE_line(const datablock &db, std::string::size_type truncate_at)
3659 {
3660  // SOURCE
3661 
3662  int molID = 0;
3663  std::vector<std::string> source;
3664 
3665  for (auto r : db["entity"])
3666  {
3667  if (r["type"] != "polymer")
3668  continue;
3669 
3670  std::string entityID = r["id"].as<std::string>();
3671 
3672  ++molID;
3673  source.push_back("MOL_ID: " + std::to_string(molID));
3674 
3675  if (r["src_method"] == "syn")
3676  source.push_back("SYNTHETIC: YES");
3677 
3678  auto &gen = db["entity_src_gen"];
3679  const std::pair<const char *, const char *> kGenSourceMapping[] = {
3680  { "gene_src_common_name", "ORGANISM_COMMON" },
3681  { "pdbx_gene_src_gene", "GENE" },
3682  { "gene_src_strain", "STRAIN" },
3683  { "pdbx_gene_src_cell_line", "CELL_LINE" },
3684  { "pdbx_gene_src_organelle", "ORGANELLE" },
3685  { "pdbx_gene_src_cellular_location", "CELLULAR_LOCATION" },
3686  { "pdbx_gene_src_scientific_name", "ORGANISM_SCIENTIFIC" },
3687  { "pdbx_gene_src_ncbi_taxonomy_id", "ORGANISM_TAXID" },
3688  { "pdbx_host_org_scientific_name", "EXPRESSION_SYSTEM" },
3689  { "pdbx_host_org_ncbi_taxonomy_id", "EXPRESSION_SYSTEM_TAXID" },
3690  { "pdbx_host_org_strain", "EXPRESSION_SYSTEM_STRAIN" },
3691  { "pdbx_host_org_variant", "EXPRESSION_SYSTEM_VARIANT" },
3692  { "pdbx_host_org_cellular_location", "EXPRESSION_SYSTEM_CELLULAR_LOCATION" },
3693  { "pdbx_host_org_vector_type", "EXPRESSION_SYSTEM_VECTOR_TYPE" },
3694  { "pdbx_host_org_vector", "EXPRESSION_SYSTEM_VECTOR" },
3695  { "pdbx_host_org_gene", "EXPRESSION_SYSTEM_GENE" },
3696  { "plasmid_name", "EXPRESSION_SYSTEM_PLASMID" }
3697  };
3698 
3699  for (auto gr : gen.find(key("entity_id") == entityID))
3700  {
3701  for (const auto &[cname, sname] : kGenSourceMapping)
3702  {
3703  std::string s = gr[cname].as<std::string>();
3704  if (not s.empty())
3705  source.push_back(sname + ": "s + s);
3706  }
3707  }
3708 
3709  auto &nat = db["entity_src_nat"];
3710  const std::pair<const char *, const char *> kNatSourceMapping[] = {
3711  { "common_name", "ORGANISM_COMMON" },
3712  { "strain", "STRAIN" },
3713  { "pdbx_organism_scientific", "ORGANISM_SCIENTIFIC" },
3714  { "pdbx_ncbi_taxonomy_id", "ORGANISM_TAXID" },
3715  { "pdbx_cellular_location", "CELLULAR_LOCATION" },
3716  { "pdbx_plasmid_name", "PLASMID" },
3717  { "pdbx_organ", "ORGAN" },
3718  { "details", "OTHER_DETAILS" }
3719  };
3720 
3721  for (auto nr : nat.find(key("entity_id") == entityID))
3722  {
3723  for (const auto &[cname, sname] : kNatSourceMapping)
3724  {
3725  std::string s = nr[cname].as<std::string>();
3726  if (not s.empty())
3727  source.push_back(sname + ": "s + s);
3728  }
3729  }
3730  }
3731 
3732  return FixStringLength("SOURCE " + join(source, "; "), truncate_at);
3733 }
3734 
3735 std::string get_AUTHOR_line(const datablock &db, std::string::size_type truncate_at)
3736 {
3737  // AUTHOR
3738  std::vector<std::string> author;
3739  for (auto r : db["audit_author"])
3740  author.push_back(cif2pdbAuth(r["name"].as<std::string>()));
3741 
3742  return FixStringLength("AUTHOR " + join(author, "; "), truncate_at);
3743 }
3744 
3745 // --------------------------------------------------------------------
3746 
3747 void write(std::ostream &os, const datablock &db)
3748 {
3749  fill_out_streambuf fb(os);
3750 
3751  int numRemark = 0, numHet = 0, numHelix = 0, numSheet = 0, numTurn = 0, numSite = 0, numXform = 0, numCoord = 0, numTer = 0, numConect = 0, numSeq = 0;
3752 
3753  WriteTitle(os, db);
3754 
3755  int savedLineCount = fb.get_line_count();
3756  // numRemark = WriteRemarks(pdbFile, db);
3757  WriteRemarks(os, db);
3758  numRemark = fb.get_line_count() - savedLineCount;
3759 
3760  numSeq = WritePrimaryStructure(os, db);
3761  numHet = WriteHeterogen(os, db);
3762  std::tie(numHelix, numSheet) = WriteSecondaryStructure(os, db);
3763  WriteConnectivity(os, db);
3764  numSite = WriteMiscellaneousFeatures(os, db);
3765  WriteCrystallographic(os, db);
3766  numXform = WriteCoordinateTransformation(os, db);
3767  std::tie(numCoord, numTer) = WriteCoordinate(os, db);
3768 
3769  os << cif::format("MASTER %5d 0%5d%5d%5d%5d%5d%5d%5d%5d%5d%5d", numRemark, numHet, numHelix, numSheet, numTurn, numSite, numXform, numCoord, numTer, numConect, numSeq) << std::endl
3770  << "END" << std::endl;
3771 }
3772 
3773 void write(const std::filesystem::path &p, const datablock &db)
3774 {
3775  gzio::ofstream out(p);
3776 
3777  bool writePDB = false;
3778  if (p.extension() == ".gz")
3779  writePDB = iequals(p.stem().extension().string(), ".pdb");
3780  else
3781  writePDB = iequals(p.extension().string(), ".pdb");
3782 
3783  if (writePDB)
3784  write(out, db);
3785  else
3786  db.write(out);
3787 }
3788 
3789 
3790 } // namespace cif::pdb
void WriteRemark999(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2667
void to_upper(std::string &s)
Definition: text.cpp:128
void WriteRemark3(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2112
void WriteRemark450(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2522
void replace_all(std::string &s, std::string_view what, std::string_view with)
Definition: text.cpp:134
std::tuple< int, int > WriteSecondaryStructure(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:3068
int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2700
void WriteRemarks(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2678
std::string_view text() const
Definition: cif2pdb.cpp:651
Fi(const category &cat, condition &&cond, const char *f)
Definition: cif2pdb.cpp:667
void write(std::ostream &os, const datablock &db)
Definition: cif2pdb.cpp:3747
std::string cif2pdbAuth(std::string name)
Definition: cif2pdb.cpp:71
void WriteRemark3Shelxl(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:1467
doublereal * c
doublereal * g
void WriteRemark280(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2332
std::vector< SelLine >::iterator find(std::vector< SelLine > &text, const std::string &img_name)
Definition: selfile.cpp:553
std::string get_AUTHOR_line(const datablock &db, std::string::size_type truncate_at)
Definition: cif2pdb.cpp:3735
RM(const char *desc, int width=0, int precision=6)
Definition: cif2pdb.cpp:785
static double * y
void trim(std::string &s)
Definition: text.cpp:205
void WriteRemark2(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:605
virtual void out(std::ostream &os)
Definition: cif2pdb.cpp:710
const std::map< std::string, int > kMonths
Definition: pdb2cif.cpp:112
SEP(const char *txt, int width, int precision=6)
Definition: cif2pdb.cpp:806
void WriteRemark3Refmac(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:1139
std::string cifSoftware(const datablock &db, SoftwareType sw)
Definition: cif2pdb.cpp:117
void abs(Image< double > &op)
int WriteHeterogen(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2808
void WriteRemark200(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2192
double gi
virtual void out(std::ostream &os)
Definition: cif2pdb.cpp:672
bool iequals(std::string_view a, std::string_view b)
Definition: text.cpp:59
void WriteRemark3ProlSQ(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:1993
doublereal * x
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
void WriteRemark3Phenix(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:1547
std::string cif2pdbSymmetry(std::string s)
Definition: cif2pdb.cpp:82
doublereal * d
std::string cif2pdbDate(const std::string &d)
Definition: cif2pdb.cpp:47
void WriteCrystallographic(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:3302
void WriteRemark800(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2642
glob_log first
doublereal * b
Ff(const category &cat, condition &&cond, const char *f)
Definition: cif2pdb.cpp:705
struct _constraint * cs
std::ostream & operator<<(std::ostream &os, FBase &&fld)
Definition: cif2pdb.cpp:776
size_t WriteContinuedLine(std::ostream &pdbFile, std::string header, int &count, int cLen, std::string text, std::string::size_type lStart=0)
Definition: cif2pdb.cpp:187
void write_header_lines(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:308
void WriteRemark3BusterTNT(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:824
viol type
void WriteRemark350(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2393
RM< 3 > RM3
Definition: cif2pdb.cpp:795
double * f
std::vector< std::string > MapAsymIDs2ChainIDs(const std::vector< std::string > &asymIDs, const datablock &db)
Definition: cif2pdb.cpp:164
const char * mText
Definition: cif2pdb.cpp:812
void WriteRemark465(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2533
row_handle mRow
Definition: cif2pdb.cpp:656
int WriteMiscellaneousFeatures(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:3251
const char * mDesc
Definition: cif2pdb.cpp:791
double z
virtual ~FBase()
Definition: cif2pdb.cpp:633
Fs(const category &cat, condition &&cond, const char *f, int remarkNr=3)
Definition: cif2pdb.cpp:744
Fi(row_handle r, const char *f)
Definition: cif2pdb.cpp:663
std::vector< std::string > word_wrap(const std::string &text, size_t width)
Definition: text.cpp:488
int VERBOSE
Definition: utilities.cpp:58
void WriteRemark3NuclSQ(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:1889
void WriteRemark3XPlor(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:1774
int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:3312
std::tuple< int, int > WriteCoordinatesForModel(std::ostream &pdbFile, const datablock &db, const std::map< std::string, std::tuple< std::string, int, std::string >> &last_resseq_for_chain_map, std::set< std::string > &terminatedChains, int model_nr)
Definition: cif2pdb.cpp:3350
std::string get_SOURCE_line(const datablock &db, std::string::size_type truncate_at)
Definition: cif2pdb.cpp:3658
void sort(struct DCEL_T *dcel)
Definition: sorting.cpp:18
std::string cif2pdbAtomName(std::string name, std::string resName, const datablock &db)
Definition: cif2pdb.cpp:90
Ff(row_handle r, const char *f)
Definition: cif2pdb.cpp:701
int m
std::tuple< int, int > WriteCoordinate(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:3481
void WriteRemark470(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2580
void WriteTitle(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:535
std::string FixStringLength(const std::string &s, std::string::size_type l)
Definition: cif2pdb.cpp:3545
std::string get_COMPND_line(const datablock &db, std::string::size_type truncate_at)
Definition: cif2pdb.cpp:3599
void WriteRemark1(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:587
#define SEP
Definition: utils.h:10
void WriteRemark3CNS(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:1001
const char * mField
Definition: cif2pdb.cpp:657
FBase(row_handle r, const char *f)
Definition: cif2pdb.cpp:638
void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:3172
float r2
virtual void out(std::ostream &os)
Definition: cif2pdb.cpp:750
void WriteRemark610(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2637
std::string to_string(bond_type bondType)
Definition: compound.cpp:43
void WritePDBHeaderLines(std::ostream &os, const datablock &db)
Definition: cif2pdb.cpp:3539
Fs(row_handle r, const char *f, int remarkNr=3)
Definition: cif2pdb.cpp:739
void writePDB(const FileName &fnPDB, bool renumber, const std::vector< std::string > &remarks, const callable &atomList)
Write rich phantom to PDB file.
Definition: pdb.cpp:725
int * n
std::string get_HEADER_line(const datablock &db, std::string::size_type truncate_at)
Definition: cif2pdb.cpp:3557
FBase(const category &cat, condition &&cond, const char *f)
Definition: cif2pdb.cpp:643
doublereal * a
void WriteRemark400(std::ostream &pdbFile, const datablock &db)
Definition: cif2pdb.cpp:2512
size_t WriteOneContinuedLine(std::ostream &pdbFile, std::string header, int cLen, std::string line, int lStart=0)
Definition: cif2pdb.cpp:222
size_t WriteCitation(std::ostream &pdbFile, const datablock &db, row_handle r, int reference)
Definition: cif2pdb.cpp:228
float r1