Xmipp  v3.23.11-Nereus
tls.cpp
Go to the documentation of this file.
1 /*
2  Created by: Maarten L. Hekkelman
3  Date: dinsdag 07 november, 2017
4 
5  Copyright 2017 NKI AVL
6 
7  Permission is hereby granted, free of charge, to any person obtaining
8  a copy of this software and associated documentation files (the
9  "Software"), to deal in the Software without restriction, including
10  without limitation the rights to use, copy, modify, merge, publish,
11  distribute, sublicense, and/or sell copies of the Software, and to
12  permit persons to whom the Software is furnished to do so, subject to
13  the following conditions:
14 
15  The above copyright notice and this permission notice shall be
16  included in all copies or substantial portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26 
27 // #include <sys/ioctl.h>
28 // #include <termios.h>
29 
30 #include "cif++.hpp"
31 #include "cif++/pdb/tls.hpp"
32 
33 #include <iomanip>
34 #include <iostream>
35 
36 namespace cif
37 {
38 
39 const int
42 
43 // --------------------------------------------------------------------
44 // We parse selection statements and create a selection expression tree
45 // which is then interpreted by setting the selected flag for the
46 // residues. After that, the selected ranges are collected and printed.
47 
49 {
50  std::string chainID;
51  int seqNr = 0;
52  char iCode;
53  std::string name;
54  bool selected;
55 
56  std::string asymID;
57  int seqID = 0;
58 
59  bool operator==(const tls_residue &rhs) const
60  {
61  return chainID == rhs.chainID and
62  seqNr == rhs.seqNr and
63  iCode == rhs.iCode and
64  iequals(name, rhs.name) and
65  selected == rhs.selected;
66  }
67 };
68 
69 void dump_selection(const std::vector<tls_residue> &selected, size_t indentLevel)
70 {
71  std::string indent(indentLevel * 2, ' ');
72 
73  auto i = selected.begin();
74  bool first = true;
75 
76  // First print in PDB space
77  while (i != selected.end())
78  {
79  auto b = find_if(i, selected.end(), [](auto s) -> bool
80  { return s.selected; });
81  if (b == selected.end())
82  break;
83 
84  if (first)
85  std::cout << indent << "PDB:" << std::endl;
86  first = false;
87 
88  auto e = find_if(b, selected.end(), [b](auto s) -> bool
89  { return s.chainID != b->chainID or not s.selected; });
90 
91  std::cout << indent << " >> " << b->chainID << ' ' << b->seqNr << ':' << (e - 1)->seqNr << std::endl;
92  i = e;
93  }
94 
95  // Then in mmCIF space
96 
97  if (not first)
98  std::cout << indent << "mmCIF:" << std::endl;
99 
100  i = selected.begin();
101  while (i != selected.end())
102  {
103  auto b = find_if(i, selected.end(), [](auto s) -> bool
104  { return s.selected; });
105  if (b == selected.end())
106  break;
107 
108  auto e = find_if(b, selected.end(), [b](auto s) -> bool
109  { return s.asymID != b->asymID or not s.selected; });
110 
111  std::string asymID = b->asymID;
112  int from = b->seqID, to = from;
113 
114  for (auto j = b + 1; j != e; ++j)
115  {
116  if (j->seqID == to + 1)
117  to = j->seqID;
118  else if (j->seqID != to) // probably an insertion code
119  {
120  if (from == kNoSeqNum or to == kNoSeqNum)
121  std::cout << indent << " >> " << asymID << std::endl;
122  else
123  std::cout << indent << " >> " << asymID << ' ' << from << ':' << to << std::endl;
124  asymID = b->asymID;
125  from = to = b->seqID;
126  }
127  }
128 
129  if (from == kNoSeqNum or to == kNoSeqNum)
130  std::cout << indent << " >> " << asymID << std::endl;
131  else
132  std::cout << indent << " >> " << asymID << ' ' << from << ':' << to << std::endl;
133 
134  i = e;
135  }
136 
137  if (first)
138  {
139  if (isatty(STDOUT_FILENO))
140  std::cout << indent << cif::coloured("Empty selection") << std::endl;
141  else
142  std::cout << indent << "Empty selection" << std::endl;
143  }
144 }
145 
146 std::vector<std::tuple<std::string, int, int>> tls_selection::get_ranges(cif::datablock &db, bool pdbNamespace) const
147 {
148  std::vector<tls_residue> selected;
149 
150  // Collect the residues from poly seq scheme...
151  for (auto r : db["pdbx_poly_seq_scheme"])
152  {
153  std::string chain, seqNr, iCode, name;
154 
155  std::string asymID;
156  int seqID = 0;
157 
158  if (pdbNamespace)
159  cif::tie(chain, seqNr, iCode, name, asymID, seqID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id", "seq_id");
160  else
161  {
162  cif::tie(chain, seqNr, name) = r.get("asym_id", "seq_id", "mon_id");
163  asymID = chain;
164  seqID = stoi(seqNr);
165  }
166 
167  if (seqNr.empty())
168  continue;
169 
170  if (iCode.length() > 1)
171  throw std::runtime_error("invalid iCode");
172 
173  selected.push_back({ chain, stoi(seqNr), iCode[0], name, false, asymID, seqID });
174  }
175 
176  // ... those from the nonpoly scheme
177  for (auto r : db["pdbx_nonpoly_scheme"])
178  {
179  std::string chain, seqNr, iCode, name, asymID;
180 
181  if (pdbNamespace)
182  {
183  cif::tie(chain, seqNr, iCode, name, asymID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id");
184  if (seqNr.empty())
185  continue;
186  }
187  else
188  {
189  cif::tie(chain, name) = r.get("asym_id", "mon_id");
190  asymID = chain;
191  seqNr = "0";
192  }
193 
194  if (iequals(name, "HOH") or iequals(name, "H2O"))
195  continue;
196 
197  if (iCode.length() > 1)
198  throw std::runtime_error("invalid iCode");
199 
200  selected.push_back({ chain, stoi(seqNr), iCode[0], name, false, asymID, kNoSeqNum });
201  }
202 
203  // ... those from the nonpoly scheme
204  for (auto r : db["pdbx_branch_scheme"])
205  {
206  std::string chain, seqNr, iCode, name, asymID;
207 
208  if (pdbNamespace)
209  {
210  cif::tie(chain, seqNr, iCode, name, asymID) = r.get("auth_asym_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id");
211  if (seqNr.empty())
212  continue;
213  }
214  else
215  {
216  cif::tie(chain, name) = r.get("asym_id", "mon_id");
217  asymID = chain;
218  seqNr = "0";
219  }
220 
221  if (iCode.length() > 1)
222  throw std::runtime_error("invalid iCode");
223 
224  selected.push_back({ chain, stoi(seqNr), iCode[0], name, false, asymID, kNoSeqNum });
225  }
226 
227  // selected might consist of multiple ranges
228  // output per chain
229 
230  stable_sort(selected.begin(), selected.end(), [](auto &a, auto &b) -> bool
231  {
232  int d = a.chainID.compare(b.chainID);
233  if (d == 0)
234  d = a.seqNr - b.seqNr;
235  return d < 0; });
236 
237  collect_residues(db, selected);
238 
239  std::vector<std::tuple<std::string, int, int>> result;
240 
241  if (pdbNamespace)
242  {
243  auto i = selected.begin();
244 
245  while (i != selected.end())
246  {
247  auto b = find_if(i, selected.end(), [](auto s) -> bool
248  { return s.selected; });
249  if (b == selected.end())
250  break;
251 
252  auto e = find_if(b, selected.end(), [b](auto s) -> bool
253  { return s.chainID != b->chainID or not s.selected; });
254 
255  // return ranges with strict increasing sequence numbers.
256  // So when there's a gap in the sequence we split the range.
257  // Beware of iCodes though
258  result.push_back(std::make_tuple(b->chainID, b->seqNr, b->seqNr));
259  for (auto j = b + 1; j != e; ++j)
260  {
261  if (j->seqNr == std::get<2>(result.back()) + 1)
262  std::get<2>(result.back()) = j->seqNr;
263  else if (j->seqNr != std::get<2>(result.back())) // probably an insertion code
264  result.push_back(std::make_tuple(b->chainID, j->seqNr, j->seqNr));
265  }
266 
267  i = e;
268  }
269  }
270  else
271  {
272  auto i = selected.begin();
273 
274  while (i != selected.end())
275  {
276  auto b = find_if(i, selected.end(), [](auto s) -> bool
277  { return s.selected; });
278  if (b == selected.end())
279  break;
280 
281  auto e = find_if(b, selected.end(), [b](auto s) -> bool
282  { return s.asymID != b->asymID or not s.selected; });
283 
284  // return ranges with strict increasing sequence numbers.
285  // So when there's a gap in the sequence we split the range.
286  // Beware of iCodes though
287  result.push_back(std::make_tuple(b->asymID, b->seqID, b->seqID));
288  for (auto j = b + 1; j != e; ++j)
289  {
290  if (j->seqID == std::get<2>(result.back()) + 1)
291  std::get<2>(result.back()) = j->seqID;
292  else if (j->seqID != std::get<2>(result.back())) // probably an insertion code
293  result.push_back(std::make_tuple(b->asymID, j->seqID, j->seqID));
294  }
295 
296  i = e;
297  }
298  }
299 
300  for (auto &&[name, i1, i2] : result)
301  {
302  if (i1 == kNoSeqNum) i1 = 0;
303  if (i2 == kNoSeqNum) i2 = 0;
304  }
305 
306  return result;
307 }
308 
309 struct tls_selection_not : public tls_selection
310 {
311  tls_selection_not(std::unique_ptr<tls_selection> selection)
312  : selection(selection.release())
313  {
314  }
315 
316  void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
317  {
318  selection->collect_residues(db, residues, indentLevel + 1);
319 
320  for (auto &r : residues)
321  r.selected = not r.selected;
322 
323  if (cif::VERBOSE > 0)
324  {
325  std::cout << std::string(indentLevel * 2, ' ') << "NOT" << std::endl;
326  dump_selection(residues, indentLevel);
327  }
328  }
329 
330  std::unique_ptr<tls_selection> selection;
331 };
332 
333 struct tls_selection_all : public tls_selection
334 {
336 
337  void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
338  {
339  for (auto &r : residues)
340  r.selected = true;
341 
342  if (cif::VERBOSE > 0)
343  {
344  std::cout << std::string(indentLevel * 2, ' ') << "ALL" << std::endl;
345  dump_selection(residues, indentLevel);
346  }
347  }
348 };
349 
351 {
352  tls_selection_chain(const std::string &chainID)
353  : m_chain(chainID)
354  {
355  }
356 
357  void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
358  {
359  bool allChains = m_chain == "*";
360 
361  for (auto &r : residues)
362  r.selected = allChains or r.chainID == m_chain;
363 
364  if (cif::VERBOSE > 0)
365  {
366  std::cout << std::string(indentLevel * 2, ' ') << "CHAIN " << m_chain << std::endl;
367  dump_selection(residues, indentLevel);
368  }
369  }
370 
371  std::string m_chain;
372 };
373 
375 {
377  : m_seq_nr(seqNr)
378  , m_icode(iCode)
379  {
380  }
381 
382  void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
383  {
384  for (auto &r : residues)
385  r.selected = r.seqNr == m_seq_nr and r.iCode == m_icode;
386 
387  if (cif::VERBOSE > 0)
388  {
389  std::cout << std::string(indentLevel * 2, ' ') << "ResID " << m_seq_nr << (m_icode ? std::string{ m_icode } : "") << std::endl;
390  dump_selection(residues, indentLevel);
391  }
392  }
393 
394  int m_seq_nr;
395  char m_icode;
396 };
397 
399 {
401  : m_first(first)
402  , m_last(last)
403  {
404  }
405 
406  void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
407  {
408  for (auto &r : residues)
409  {
410  r.selected = ((r.seqNr >= m_first or m_first == kResidueNrWildcard) and
411  (r.seqNr <= m_last or m_last == kResidueNrWildcard));
412  }
413 
414  if (cif::VERBOSE > 0)
415  {
416  std::cout << std::string(indentLevel * 2, ' ') << "Range " << m_first << ':' << m_last << std::endl;
417  dump_selection(residues, indentLevel);
418  }
419  }
420 
421  int m_first, m_last;
422 };
423 
425 {
426  tls_selection_range_id(int first, int last, char icodeFirst = 0, char icodeLast = 0)
427  : m_first(first)
428  , m_last(last)
429  , m_icode_first(icodeFirst)
430  , m_icode_last(icodeLast)
431  {
432  }
433 
434  void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
435  {
436  // need to do this per chain
437  std::set<std::string> chains;
438  for (auto &r : residues)
439  chains.insert(r.chainID);
440 
441  for (std::string chain : chains)
442  {
443  auto f = find_if(residues.begin(), residues.end(),
444  [this,chain](auto r) -> bool
445  {
446  return r.chainID == chain and r.seqNr == m_first and r.iCode == m_icode_first;
447  });
448 
449  auto l = find_if(residues.begin(), residues.end(),
450  [this,chain](auto r) -> bool
451  {
452  return r.chainID == chain and r.seqNr == m_last and r.iCode == m_icode_last;
453  });
454 
455  if (f != residues.end() and l != residues.end() and f <= l)
456  {
457  ++l;
458 
459  for (; f != l; ++f)
460  f->selected = true;
461  }
462  }
463 
464  if (cif::VERBOSE > 0)
465  {
466  std::cout << std::string(indentLevel * 2, ' ') << "Through " << m_first << ':' << m_last << std::endl;
467  dump_selection(residues, indentLevel);
468  }
469  }
470 
471  int m_first, m_last;
472  char m_icode_first, m_icode_last;
473 };
474 
475 struct tls_selection_union : public tls_selection
476 {
477  tls_selection_union(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &rhs)
478  : lhs(lhs.release())
479  , rhs(rhs.release())
480  {
481  }
482 
483  tls_selection_union(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &&rhs)
484  : lhs(lhs.release())
485  , rhs(rhs.release())
486  {
487  }
488 
489  void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
490  {
491  auto a = residues;
492  for_each(a.begin(), a.end(), [](auto &r)
493  { r.selected = false; });
494 
495  auto b = residues;
496  for_each(b.begin(), b.end(), [](auto &r)
497  { r.selected = false; });
498 
499  lhs->collect_residues(db, a, indentLevel + 1);
500  rhs->collect_residues(db, b, indentLevel + 1);
501 
502  for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
503  ri->selected = ai->selected or bi->selected;
504 
505  if (cif::VERBOSE > 0)
506  {
507  std::cout << std::string(indentLevel * 2, ' ') << "Union" << std::endl;
508  dump_selection(residues, indentLevel);
509  }
510  }
511 
512  std::unique_ptr<tls_selection> lhs;
513  std::unique_ptr<tls_selection> rhs;
514 };
515 
516 struct tls_selection_intersection : public tls_selection
517 {
518  tls_selection_intersection(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &rhs)
519  : lhs(lhs.release())
520  , rhs(rhs.release())
521  {
522  }
523 
524  tls_selection_intersection(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &&rhs)
525  : lhs(lhs.release())
526  , rhs(rhs.release())
527  {
528  }
529 
530  void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
531  {
532  auto a = residues;
533  for_each(a.begin(), a.end(), [](auto &r)
534  { r.selected = false; });
535 
536  auto b = residues;
537  for_each(b.begin(), b.end(), [](auto &r)
538  { r.selected = false; });
539 
540  lhs->collect_residues(db, a, indentLevel + 1);
541  rhs->collect_residues(db, b, indentLevel + 1);
542 
543  for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
544  ri->selected = ai->selected and bi->selected;
545 
546  if (cif::VERBOSE > 0)
547  {
548  std::cout << std::string(indentLevel * 2, ' ') << "Intersection" << std::endl;
549  dump_selection(residues, indentLevel);
550  }
551  }
552 
553  std::unique_ptr<tls_selection> lhs;
554  std::unique_ptr<tls_selection> rhs;
555 };
556 
558 {
559  public:
560  tls_selection_by_name(const std::string &resname)
561  : m_name(resname)
562  {
563  }
564 
565  void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
566  {
567  for (auto &r : residues)
568  r.selected = r.name == m_name;
569 
570  if (cif::VERBOSE > 0)
571  {
572  std::cout << std::string(indentLevel * 2, ' ') << "Name " << m_name << std::endl;
573  dump_selection(residues, indentLevel);
574  }
575  }
576 
577  std::string m_name;
578 };
579 
581 {
582  public:
583  tls_selection_by_element(const std::string &element)
584  : m_element(element)
585  {
586  }
587 
588  void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
589  {
590  // rationale... We want to select residues only. So we select
591  // residues that have just a single atom of type m_element.
592  // And we assume these have as residue name... m_element.
593  // ... Right?
594 
595  for (auto &r : residues)
596  r.selected = iequals(r.name, m_element);
597 
598  if (cif::VERBOSE > 0)
599  {
600  std::cout << std::string(indentLevel * 2, ' ') << "Element " << m_element << std::endl;
601  dump_selection(residues, indentLevel);
602  }
603  }
604 
605  std::string m_element;
606 };
607 
608 // --------------------------------------------------------------------
609 
611 {
612  public:
613  tls_selection_parser_impl(const std::string &selection)
614  : m_selection(selection)
615  , m_p(m_selection.begin())
616  , m_end(m_selection.end())
617  {
618  }
619 
620  virtual std::unique_ptr<tls_selection> Parse() = 0;
621 
622  protected:
623  virtual int get_next_token() = 0;
624  virtual void match(int token);
625  virtual std::string to_string(int token) = 0;
626 
627  std::string m_selection;
628  std::string::iterator m_p, m_end;
630  std::string m_token;
631 };
632 
634 {
635  if (m_lookahead == token)
636  m_lookahead = get_next_token();
637  else
638  {
639  std::string expected;
640  if (token >= 256)
641  expected = to_string(token);
642  else
643  expected = { char(token) };
644 
645  std::string found;
646  if (m_lookahead >= 256)
647  found = to_string(m_lookahead) + " (" + m_token + ')';
648  else
649  found = { char(m_lookahead) };
650 
651  throw std::runtime_error("Expected " + expected + " but found " + found);
652  }
653 }
654 
655 // --------------------------------------------------------------------
656 
658 {
659  public:
660  TLSSelectionParserImplPhenix(const std::string &selection)
661  : tls_selection_parser_impl(selection)
662  {
663  m_lookahead = get_next_token();
664  }
665 
666  virtual std::unique_ptr<tls_selection> Parse();
667 
668  private:
669  std::unique_ptr<tls_selection> ParseAtomSelection();
670  std::unique_ptr<tls_selection> ParseTerm();
671  std::unique_ptr<tls_selection> ParseFactor();
672 
673  enum TOKEN
674  {
675  pt_NONE = 0,
676  pt_IDENT = 256,
677  pt_STRING,
678  pt_NUMBER,
679  pt_RESID,
680  pt_EOLN,
681  pt_KW_ALL,
682  pt_KW_CHAIN,
683  pt_KW_RESSEQ,
684  pt_KW_RESID,
685  pt_KW_ICODE,
686  pt_KW_RESNAME,
687  pt_KW_ELEMENT,
688  pt_KW_AND,
689  pt_KW_OR,
690  pt_KW_NOT,
691  pt_KW_PDB,
692  pt_KW_ENTRY,
693  pt_KW_THROUGH
694  };
695 
696  virtual int get_next_token();
697  virtual std::string to_string(int token);
698 
699  int m_value_i;
700  std::string m_value_s;
701  char m_icode;
702 };
703 
704 int TLSSelectionParserImplPhenix::get_next_token()
705 {
706  int result = pt_NONE;
707  enum STATE
708  {
709  st_START,
710  st_RESID = 200,
711  st_NUM = 300,
712  st_IDENT = 400,
713  st_QUOTED = 500,
714  st_DQUOTED = 550,
715  st_OTHER = 600
716  };
717  int state = st_START;
718 
719  m_value_i = 0;
720  m_icode = 0;
721  m_value_s.clear();
722  auto s = m_p;
723 
724  auto start = state;
725  m_token.clear();
726 
727  auto restart = [&]()
728  {
729  switch (start)
730  {
731  case st_START: state = start = st_RESID; break;
732  case st_RESID: state = start = st_NUM; break;
733  case st_NUM: state = start = st_IDENT; break;
734  case st_IDENT: state = start = st_QUOTED; break;
735  case st_QUOTED: state = start = st_DQUOTED; break;
736  case st_DQUOTED: state = start = st_OTHER; break;
737  }
738  m_token.clear();
739  m_p = s;
740  };
741 
742  auto retract = [&]()
743  {
744  --m_p;
745  m_token.pop_back();
746  };
747 
748  while (result == pt_NONE)
749  {
750  char ch = *m_p++;
751  if (m_p > m_end)
752  ch = 0;
753  else
754  m_token += ch;
755 
756  switch (state)
757  {
758  // start block
759  case st_START:
760  if (ch == 0)
761  result = pt_EOLN;
762  else if (isspace(ch))
763  {
764  m_token.clear();
765  ++s;
766  }
767  else
768  restart();
769  break;
770 
771  // RESID block
772  case st_RESID:
773  if (ch == '-')
774  state = st_RESID + 1;
775  else if (isdigit(ch))
776  {
777  m_value_i = (ch - '0');
778  state = st_RESID + 2;
779  }
780  else
781  restart();
782  break;
783 
784  case st_RESID + 1:
785  if (isdigit(ch))
786  {
787  m_value_i = -(ch - '0');
788  state = st_RESID + 2;
789  }
790  else
791  restart();
792  break;
793 
794  case st_RESID + 2:
795  if (isdigit(ch))
796  m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
797  else if (isalpha(ch))
798  {
799  m_icode = ch;
800  state = st_RESID + 3;
801  }
802  else
803  restart();
804  break;
805 
806  case st_RESID + 3:
807  if (isalnum(ch))
808  restart();
809  else
810  {
811  retract();
812  result = pt_RESID;
813  }
814  break;
815 
816  // NUM block
817 
818  case st_NUM:
819  if (ch == '-')
820  state = st_NUM + 1;
821  else if (isdigit(ch))
822  {
823  m_value_i = ch - '0';
824  state = st_NUM + 2;
825  }
826  else
827  restart();
828  break;
829 
830  case st_NUM + 1:
831  if (isdigit(ch))
832  {
833  m_value_i = -(ch - '0');
834  state = st_NUM + 2;
835  }
836  else
837  restart();
838  break;
839 
840  case st_NUM + 2:
841  if (isdigit(ch))
842  m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
843  else if (not isalpha(ch))
844  {
845  result = pt_NUMBER;
846  retract();
847  }
848  else
849  restart();
850  break;
851 
852  // IDENT block
853 
854  case st_IDENT:
855  if (isalnum(ch))
856  {
857  m_value_s = { ch };
858  state = st_IDENT + 1;
859  }
860  else
861  restart();
862  break;
863 
864  case st_IDENT + 1:
865  if (isalnum(ch) or ch == '\'')
866  m_value_s += ch;
867  else
868  {
869  --m_p;
870  result = pt_IDENT;
871  }
872  break;
873 
874  // QUOTED block
875 
876  case st_QUOTED:
877  if (ch == '\'')
878  {
879  m_value_s.clear();
880  state = st_QUOTED + 1;
881  }
882  else
883  restart();
884  break;
885 
886  case st_QUOTED + 1:
887  if (ch == '\'')
888  result = pt_STRING;
889  else if (ch == 0)
890  throw std::runtime_error("Unexpected end of selection, missing quote character?");
891  else
892  m_value_s += ch;
893  break;
894 
895  // QUOTED block
896 
897  case st_DQUOTED:
898  if (ch == '\"')
899  {
900  m_value_s.clear();
901  state = st_DQUOTED + 1;
902  }
903  else
904  restart();
905  break;
906 
907  case st_DQUOTED + 1:
908  if (ch == '\"')
909  result = pt_STRING;
910  else if (ch == 0)
911  throw std::runtime_error("Unexpected end of selection, missing quote character?");
912  else
913  m_value_s += ch;
914  break;
915 
916  // OTHER block
917  case st_OTHER:
918  result = ch;
919  break;
920  }
921  }
922 
923  if (result == pt_IDENT)
924  {
925  if (iequals(m_value_s, "CHAIN"))
926  result = pt_KW_CHAIN;
927  else if (iequals(m_value_s, "ALL"))
928  result = pt_KW_ALL;
929  else if (iequals(m_value_s, "AND"))
930  result = pt_KW_AND;
931  else if (iequals(m_value_s, "OR"))
932  result = pt_KW_OR;
933  else if (iequals(m_value_s, "NOT"))
934  result = pt_KW_NOT;
935  else if (iequals(m_value_s, "RESSEQ"))
936  result = pt_KW_RESSEQ;
937  else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI"))
938  result = pt_KW_RESID;
939  else if (iequals(m_value_s, "RESNAME"))
940  result = pt_KW_RESNAME;
941  else if (iequals(m_value_s, "ELEMENT"))
942  result = pt_KW_ELEMENT;
943  else if (iequals(m_value_s, "PDB"))
944  result = pt_KW_PDB;
945  else if (iequals(m_value_s, "ENTRY"))
946  result = pt_KW_ENTRY;
947  else if (iequals(m_value_s, "THROUGH"))
948  result = pt_KW_THROUGH;
949  }
950 
951  return result;
952 }
953 
954 std::string TLSSelectionParserImplPhenix::to_string(int token)
955 {
956  switch (token)
957  {
958  case pt_IDENT: return "identifier";
959  case pt_STRING: return "std::string";
960  case pt_NUMBER: return "number";
961  case pt_RESID: return "resid";
962  case pt_EOLN: return "end of line";
963 
964  case pt_KW_ALL: return "ALL";
965  case pt_KW_CHAIN: return "CHAIN";
966  case pt_KW_RESSEQ: return "RESSEQ";
967  case pt_KW_RESID: return "RESID";
968  case pt_KW_RESNAME: return "RESNAME";
969  case pt_KW_ELEMENT: return "ELEMENT";
970  case pt_KW_AND: return "AND";
971  case pt_KW_OR: return "OR";
972  case pt_KW_NOT: return "NOT";
973  case pt_KW_PDB: return "PDB";
974  case pt_KW_ENTRY: return "ENTRY";
975  case pt_KW_THROUGH: return "THROUGH";
976 
977  default: return "character";
978  }
979 }
980 
981 std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::Parse()
982 {
983  if (m_lookahead == pt_KW_PDB)
984  {
985  match(pt_KW_PDB);
986  // Match(pt_KW_ENTRY);
987 
988  throw std::runtime_error("Unimplemented PDB ENTRY specification");
989  }
990 
991  std::unique_ptr<tls_selection> result = ParseAtomSelection();
992 
993  bool extraParenthesis = false;
994 
995  if (m_lookahead == ')')
996  {
997  extraParenthesis = true;
998  m_lookahead = get_next_token();
999  }
1000 
1001  match(pt_EOLN);
1002 
1003  if (extraParenthesis)
1004  std::cerr << "WARNING: too many closing parenthesis in TLS selection statement" << std::endl;
1005 
1006  return result;
1007 }
1008 
1009 std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::ParseAtomSelection()
1010 {
1011  std::unique_ptr<tls_selection> result = ParseTerm();
1012 
1013  while (m_lookahead == pt_KW_OR)
1014  {
1015  match(pt_KW_OR);
1016  result.reset(new tls_selection_union(result, ParseTerm()));
1017  }
1018 
1019  return result;
1020 }
1021 
1022 std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::ParseTerm()
1023 {
1024  std::unique_ptr<tls_selection> result = ParseFactor();
1025 
1026  while (m_lookahead == pt_KW_AND)
1027  {
1028  match(pt_KW_AND);
1029  result.reset(new tls_selection_intersection(result, ParseFactor()));
1030  }
1031 
1032  return result;
1033 }
1034 
1035 std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::ParseFactor()
1036 {
1037  std::unique_ptr<tls_selection> result;
1038 
1039  switch (m_lookahead)
1040  {
1041  case '(':
1042  match('(');
1043  result = ParseAtomSelection();
1044  if (m_lookahead == pt_EOLN)
1045  std::cerr << "WARNING: missing closing parenthesis in TLS selection statement" << std::endl;
1046  else
1047  match(')');
1048  break;
1049 
1050  case pt_KW_NOT:
1051  match(pt_KW_NOT);
1052  result.reset(new tls_selection_not(ParseAtomSelection()));
1053  break;
1054 
1055  case pt_KW_CHAIN:
1056  {
1057  match(pt_KW_CHAIN);
1058 
1059  std::string chainID = m_value_s;
1060  if (m_lookahead == pt_NUMBER) // sigh
1061  {
1062  chainID = to_string(m_value_i);
1063  match(pt_NUMBER);
1064  }
1065  else
1066  match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
1067 
1068  result.reset(new tls_selection_chain(chainID));
1069  break;
1070  }
1071 
1072  case pt_KW_RESNAME:
1073  {
1074  match(pt_KW_RESNAME);
1075  std::string name = m_value_s;
1076  match(pt_IDENT);
1077  result.reset(new tls_selection_by_name(name));
1078  break;
1079  }
1080 
1081  case pt_KW_ELEMENT:
1082  {
1083  match(pt_KW_ELEMENT);
1084  std::string element = m_value_s;
1085  match(pt_IDENT);
1086  result.reset(new tls_selection_by_element(element));
1087  break;
1088  }
1089 
1090  case pt_KW_RESSEQ:
1091  {
1092  match(pt_KW_RESSEQ);
1093 
1094  int from = m_value_i;
1095  match(pt_NUMBER);
1096 
1097  int to = from;
1098  if (m_lookahead == ':')
1099  {
1100  match(':');
1101  to = m_value_i;
1102  match(pt_NUMBER);
1103  }
1104 
1105  result.reset(new tls_selection_range_seq(from, to));
1106  break;
1107  }
1108 
1109  case pt_KW_RESID:
1110  {
1111  match(pt_KW_RESID);
1112 
1113  int from, to;
1114  char icode_from = 0, icode_to = 0;
1115  bool through = false;
1116 
1117  from = to = m_value_i;
1118 
1119  if (m_lookahead == pt_NUMBER)
1120  match(pt_NUMBER);
1121  else
1122  {
1123  icode_from = m_icode;
1124  match(pt_RESID);
1125  }
1126 
1127  if (m_lookahead == ':' or m_lookahead == pt_KW_THROUGH or m_lookahead == '-')
1128  {
1129  through = m_lookahead == pt_KW_THROUGH;
1130 
1131  match(m_lookahead);
1132 
1133  to = m_value_i;
1134  if (m_lookahead == pt_NUMBER)
1135  match(pt_NUMBER);
1136  else
1137  {
1138  icode_to = m_icode;
1139  match(pt_RESID);
1140  }
1141 
1142  if (through)
1143  result.reset(new tls_selection_range_id(from, to, icode_from, icode_to));
1144  else
1145  {
1146  if (cif::VERBOSE and (icode_from or icode_to))
1147  std::cerr << "Warning, ignoring insertion codes" << std::endl;
1148 
1149  result.reset(new tls_selection_range_seq(from, to));
1150  }
1151  }
1152  else
1153  result.reset(new tls_selection_res_id(from, icode_from));
1154 
1155  break;
1156  }
1157 
1158  case pt_KW_ALL:
1159  match(pt_KW_ALL);
1160  result.reset(new tls_selection_all());
1161  break;
1162 
1163  default:
1164  throw std::runtime_error("Unexpected token " + to_string(m_lookahead) + " (" + m_token + ')');
1165  }
1166 
1167  return result;
1168 }
1169 
1170 // --------------------------------------------------------------------
1171 
1173 {
1174  public:
1175  TLSSelectionParserImplBuster(const std::string &selection);
1176 
1177  virtual std::unique_ptr<tls_selection> Parse();
1178 
1179  protected:
1180  enum TOKEN
1181  {
1182  bt_NONE = 0,
1183  bt_IDENT = 256,
1186  };
1187 
1188  virtual int get_next_token();
1189  virtual std::string to_string(int token);
1190 
1191  std::unique_ptr<tls_selection> ParseGroup();
1192  std::tuple<std::string, int> ParseAtom();
1193 
1194  std::unique_ptr<tls_selection> ParseOldGroup();
1195 
1197  std::string m_value_s;
1198  bool m_parsing_old_style = false;
1199 };
1200 
1202  : tls_selection_parser_impl(selection)
1203 {
1205 }
1206 
1208 {
1209  int result = bt_NONE;
1210  enum STATE
1211  {
1212  st_START,
1213  st_NEGATE,
1214  st_NUM,
1215  st_IDENT
1216  } state = st_START;
1217 
1218  m_value_i = 0;
1219  m_value_s.clear();
1220  bool negative = false;
1221 
1222  while (result == bt_NONE)
1223  {
1224  char ch = *m_p++;
1225  if (m_p > m_end)
1226  ch = 0;
1227 
1228  switch (state)
1229  {
1230  case st_START:
1231  if (ch == 0)
1232  result = bt_EOLN;
1233  else if (isspace(ch))
1234  continue;
1235  else if (isdigit(ch))
1236  {
1237  m_value_i = ch - '0';
1238  state = st_NUM;
1239  }
1240  else if (isalpha(ch))
1241  {
1242  m_value_s = { ch };
1243  state = st_IDENT;
1244  }
1245  else if (ch == '-')
1246  {
1247  state = st_NEGATE;
1248  }
1249  else
1250  result = ch;
1251  break;
1252 
1253  case st_NEGATE:
1254  if (isdigit(ch))
1255  {
1256  m_value_i = ch - '0';
1257  state = st_NUM;
1258  negative = true;
1259  }
1260  else
1261  {
1262  --m_p;
1263  result = '-';
1264  }
1265  break;
1266 
1267  case st_NUM:
1268  if (isdigit(ch))
1269  m_value_i = 10 * m_value_i + (ch - '0');
1270  else
1271  {
1272  if (negative)
1273  m_value_i = -m_value_i;
1274 
1275  result = bt_NUMBER;
1276  --m_p;
1277  }
1278  break;
1279 
1280  case st_IDENT:
1281  if (isalnum(ch))
1282  m_value_s += ch;
1283  else
1284  {
1285  --m_p;
1286  result = bt_IDENT;
1287  }
1288  break;
1289  }
1290  }
1291 
1292  return result;
1293 }
1294 
1296 {
1297  switch (token)
1298  {
1299  case bt_IDENT: return "identifier (" + m_value_s + ')';
1300  case bt_NUMBER: return "number (" + to_string(m_value_i) + ')';
1301  case bt_EOLN: return "end of line";
1302 
1303  default:
1304  assert(false);
1305  return "unknown token";
1306  }
1307 }
1308 
1309 std::unique_ptr<tls_selection> TLSSelectionParserImplBuster::ParseGroup()
1310 {
1311  std::unique_ptr<tls_selection> result;
1312 
1313  auto add = [&result](const std::string &chainID, int from, int to)
1314  {
1315  std::unique_ptr<tls_selection> sc(new tls_selection_chain(chainID));
1316  std::unique_ptr<tls_selection> sr(new tls_selection_range_seq(from, to));
1317  std::unique_ptr<tls_selection> s(new tls_selection_intersection(sc, sr));
1318 
1319  if (result == nullptr)
1320  result.reset(s.release());
1321  else
1322  result.reset(new tls_selection_union{ result, s });
1323  };
1324 
1325  match('{');
1326 
1327  do
1328  {
1329  std::string chain1;
1330  int seqNr1;
1331  std::tie(chain1, seqNr1) = ParseAtom();
1332 
1333  if (m_lookahead == '-')
1334  {
1335  std::string chain2;
1336  int seqNr2 = seqNr1;
1337 
1338  match('-');
1339 
1340  if (m_lookahead == bt_NUMBER)
1341  {
1342  seqNr2 = m_value_i;
1343  match(bt_NUMBER);
1344  }
1345  else
1346  {
1347  std::tie(chain2, seqNr2) = ParseAtom();
1348  if (chain1 != chain2)
1349  {
1350  std::cerr << "Warning, ranges over multiple chains detected" << std::endl;
1351 
1352  std::unique_ptr<tls_selection> sc1(new tls_selection_chain(chain1));
1353  std::unique_ptr<tls_selection> sr1(new tls_selection_range_seq(seqNr1, kResidueNrWildcard));
1354  std::unique_ptr<tls_selection> s1(new tls_selection_intersection(sc1, sr1));
1355 
1356  std::unique_ptr<tls_selection> sc2(new tls_selection_chain(chain2));
1357  std::unique_ptr<tls_selection> sr2(new tls_selection_range_seq(kResidueNrWildcard, seqNr2));
1358  std::unique_ptr<tls_selection> s2(new tls_selection_intersection(sc2, sr2));
1359 
1360  std::unique_ptr<tls_selection> s(new tls_selection_union(s1, s2));
1361 
1362  if (result == nullptr)
1363  result.reset(s.release());
1364  else
1365  result.reset(new tls_selection_union{ result, s });
1366 
1367  chain1.clear();
1368  }
1369  }
1370 
1371  if (not chain1.empty())
1372  add(chain1, seqNr1, seqNr2);
1373  }
1374  else
1375  add(chain1, seqNr1, seqNr1);
1376  } while (m_lookahead != '}');
1377 
1378  match('}');
1379 
1380  return result;
1381 }
1382 
1383 std::tuple<std::string, int> TLSSelectionParserImplBuster::ParseAtom()
1384 {
1385  std::string chain = m_value_s;
1386  int seqNr = kResidueNrWildcard;
1387 
1388  if (m_lookahead == '*')
1389  match('*');
1390  else
1391  match(bt_IDENT);
1392 
1393  match('|');
1394 
1395  if (m_lookahead == '*')
1396  match('*');
1397  else
1398  {
1399  seqNr = m_value_i;
1400  match(bt_NUMBER);
1401 
1402  if (m_lookahead == ':')
1403  {
1404  match(':');
1405  std::string atom = m_value_s;
1406 
1407  if (cif::VERBOSE > 0)
1408  std::cerr << "Warning: ignoring atom ID '" << atom << "' in TLS selection" << std::endl;
1409 
1410  match(bt_IDENT);
1411  }
1412  }
1413 
1414  return std::make_tuple(chain, seqNr);
1415 }
1416 
1417 std::unique_ptr<tls_selection> TLSSelectionParserImplBuster::Parse()
1418 {
1419  std::unique_ptr<tls_selection> result = ParseGroup();
1420  match(bt_EOLN);
1421  return result;
1422 }
1423 
1424 // --------------------------------------------------------------------
1425 
1427 {
1428  public:
1429  TLSSelectionParserImplBusterOld(const std::string &selection)
1430  : tls_selection_parser_impl(selection)
1431  {
1433  }
1434 
1435  virtual std::unique_ptr<tls_selection> Parse();
1436 
1437  private:
1438  std::unique_ptr<tls_selection> ParseAtomSelection();
1439  std::unique_ptr<tls_selection> ParseTerm();
1440  std::unique_ptr<tls_selection> ParseFactor();
1441 
1442  std::unique_ptr<tls_selection> ParseResid();
1443  std::unique_ptr<tls_selection> ParseChainResid();
1444 
1445  enum TOKEN
1446  {
1447  pt_NONE = 0,
1448  pt_IDENT = 256,
1449  pt_CHAINRESID,
1450  pt_STRING,
1451  pt_NUMBER,
1452  pt_RANGE,
1453  pt_EOLN,
1454 
1455  pt_KW_ALL,
1456  pt_KW_CHAIN,
1457  pt_KW_RESSEQ,
1458  pt_KW_RESID,
1459  pt_KW_RESNAME,
1460  pt_KW_ELEMENT,
1461  pt_KW_AND,
1462  pt_KW_OR,
1463  pt_KW_NOT,
1464  pt_KW_PDB,
1465  pt_KW_ENTRY,
1466  pt_KW_THROUGH
1467  };
1468 
1469  virtual int get_next_token();
1470  virtual std::string to_string(int token);
1471 
1472  int m_value_i;
1473  std::string m_value_s;
1474  int m_value_r[2];
1475 };
1476 
1477 int TLSSelectionParserImplBusterOld::get_next_token()
1478 {
1479  int result = pt_NONE;
1480  enum STATE
1481  {
1482  st_START,
1483  st_NEGATE,
1484  st_NUM,
1485  st_RANGE,
1486  st_IDENT_1,
1487  st_IDENT,
1488  st_CHAINRESID,
1489  st_QUOTED_1,
1490  st_QUOTED_2
1491  } state = st_START;
1492 
1493  m_value_i = 0;
1494  m_value_s.clear();
1495 
1496  bool negative = false;
1497 
1498  while (result == pt_NONE)
1499  {
1500  char ch = *m_p++;
1501  if (m_p > m_end)
1502  ch = 0;
1503 
1504  switch (state)
1505  {
1506  case st_START:
1507  if (ch == 0)
1508  result = pt_EOLN;
1509  else if (isspace(ch))
1510  continue;
1511  else if (isdigit(ch))
1512  {
1513  m_value_i = ch - '0';
1514  state = st_NUM;
1515  }
1516  else if (isalpha(ch))
1517  {
1518  m_value_s = { ch };
1519  state = st_IDENT_1;
1520  }
1521  else if (ch == '-')
1522  {
1523  state = st_NEGATE;
1524  }
1525  else if (ch == '\'')
1526  {
1527  state = st_QUOTED_1;
1528  }
1529  else
1530  result = ch;
1531  break;
1532 
1533  case st_NEGATE:
1534  if (isdigit(ch))
1535  {
1536  m_value_i = ch - '0';
1537  state = st_NUM;
1538  negative = true;
1539  }
1540  else
1541  {
1542  --m_p;
1543  result = '-';
1544  }
1545  break;
1546 
1547  case st_NUM:
1548  if (isdigit(ch))
1549  m_value_i = 10 * m_value_i + (ch - '0');
1550  else if (ch == '-' or ch == ':')
1551  {
1552  if (negative)
1553  m_value_i = -m_value_i;
1554 
1555  m_value_r[0] = m_value_i;
1556  m_value_r[1] = 0;
1557  state = st_RANGE;
1558  }
1559  else
1560  {
1561  if (negative)
1562  m_value_i = -m_value_i;
1563 
1564  result = pt_NUMBER;
1565  --m_p;
1566  }
1567  break;
1568 
1569  case st_RANGE: // TODO: question, is "-2--1" a valid range? We do not support that, yet
1570  if (isdigit(ch))
1571  m_value_r[1] = 10 * m_value_r[1] + (ch - '0');
1572  else if (m_value_r[1] != 0)
1573  {
1574  result = pt_RANGE;
1575  --m_p;
1576  }
1577  else
1578  {
1579  --m_p;
1580  --m_p;
1581  result = pt_NUMBER;
1582  }
1583  break;
1584 
1585  case st_IDENT_1:
1586  if (isalpha(ch))
1587  {
1588  m_value_s += ch;
1589  state = st_IDENT;
1590  }
1591  else if (isdigit(ch))
1592  {
1593  m_value_i = (ch - '0');
1594  state = st_CHAINRESID;
1595  }
1596  else
1597  {
1598  --m_p;
1599  result = pt_IDENT;
1600  }
1601  break;
1602 
1603  case st_CHAINRESID:
1604  if (isalpha(ch))
1605  {
1607  m_value_s += ch;
1608  state = st_IDENT;
1609  }
1610  else if (isdigit(ch))
1611  m_value_i = 10 * m_value_i + (ch - '0');
1612  else
1613  {
1614  --m_p;
1615  result = pt_CHAINRESID;
1616  }
1617  break;
1618 
1619  case st_IDENT:
1620  if (isalnum(ch))
1621  m_value_s += ch;
1622  else
1623  {
1624  --m_p;
1625  result = pt_IDENT;
1626  }
1627  break;
1628 
1629  case st_QUOTED_1:
1630  if (ch == '\'')
1631  {
1632  --m_p;
1633  result = '\'';
1634  }
1635  else
1636  {
1637  m_value_s = { ch };
1638  state = st_QUOTED_2;
1639  }
1640  break;
1641 
1642  case st_QUOTED_2:
1643  if (ch == '\'')
1644  result = pt_STRING;
1645  else if (ch == 0)
1646  throw std::runtime_error("Unexpected end of selection, missing quote character?");
1647  else
1648  m_value_s += ch;
1649  break;
1650  }
1651  }
1652 
1653  if (result == pt_IDENT)
1654  {
1655  if (iequals(m_value_s, "CHAIN"))
1656  result = pt_KW_CHAIN;
1657  else if (iequals(m_value_s, "ALL"))
1658  result = pt_KW_ALL;
1659  else if (iequals(m_value_s, "AND"))
1660  result = pt_KW_AND;
1661  else if (iequals(m_value_s, "OR"))
1662  result = pt_KW_OR;
1663  else if (iequals(m_value_s, "NOT"))
1664  result = pt_KW_NOT;
1665  else if (iequals(m_value_s, "RESSEQ"))
1666  result = pt_KW_RESSEQ;
1667  else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI") or iequals(m_value_s, "RESIDUES"))
1668  result = pt_KW_RESID;
1669  else if (iequals(m_value_s, "RESNAME"))
1670  result = pt_KW_RESNAME;
1671  else if (iequals(m_value_s, "PDB"))
1672  result = pt_KW_PDB;
1673  else if (iequals(m_value_s, "ENTRY"))
1674  result = pt_KW_ENTRY;
1675  else if (iequals(m_value_s, "THROUGH"))
1676  result = pt_KW_THROUGH;
1677  }
1678 
1679  return result;
1680 }
1681 
1682 std::string TLSSelectionParserImplBusterOld::to_string(int token)
1683 {
1684  switch (token)
1685  {
1686  case pt_IDENT: return "identifier (" + m_value_s + ')';
1687  case pt_STRING: return "std::string (" + m_value_s + ')';
1688  case pt_NUMBER: return "number (" + to_string(m_value_i) + ')';
1689  case pt_RANGE: return "range (" + to_string(m_value_r[0]) + ':' + to_string(m_value_r[1]) + ')';
1690  case pt_EOLN: return "end of line";
1691 
1692  case pt_KW_ALL: return "ALL";
1693  case pt_KW_CHAIN: return "CHAIN";
1694  case pt_KW_RESSEQ: return "RESSEQ";
1695  case pt_KW_RESID: return "RESID";
1696  case pt_KW_RESNAME: return "RESNAME";
1697  case pt_KW_ELEMENT: return "ELEMENT";
1698  case pt_KW_AND: return "AND";
1699  case pt_KW_OR: return "OR";
1700  case pt_KW_NOT: return "NOT";
1701  case pt_KW_PDB: return "PDB";
1702  case pt_KW_ENTRY: return "ENTRY";
1703  case pt_KW_THROUGH: return "THROUGH";
1704  default:
1705  assert(false);
1706  return "unknown token";
1707  }
1708 }
1709 
1710 std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::Parse()
1711 {
1712  if (m_lookahead == pt_KW_PDB)
1713  {
1714  match(pt_KW_PDB);
1715  // Match(pt_KW_ENTRY);
1716 
1717  throw std::runtime_error("Unimplemented PDB ENTRY specification");
1718  }
1719 
1720  std::unique_ptr<tls_selection> result = ParseAtomSelection();
1721 
1722  match(pt_EOLN);
1723 
1724  return result;
1725 }
1726 
1727 std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseAtomSelection()
1728 {
1729  std::unique_ptr<tls_selection> result = ParseTerm();
1730 
1731  while (m_lookahead == pt_KW_OR)
1732  {
1733  match(pt_KW_OR);
1734  result.reset(new tls_selection_union(result, ParseTerm()));
1735  }
1736 
1737  return result;
1738 }
1739 
1740 std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseTerm()
1741 {
1742  std::unique_ptr<tls_selection> result = ParseFactor();
1743 
1744  while (m_lookahead == pt_KW_AND)
1745  {
1746  match(pt_KW_AND);
1747  result.reset(new tls_selection_intersection(result, ParseFactor()));
1748  }
1749 
1750  return result;
1751 }
1752 
1753 std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseFactor()
1754 {
1755  std::unique_ptr<tls_selection> result;
1756 
1757  switch (m_lookahead)
1758  {
1759  case '(':
1760  match('(');
1761  result = ParseAtomSelection();
1762  match(')');
1763  break;
1764 
1765  case pt_KW_NOT:
1766  match(pt_KW_NOT);
1767  result.reset(new tls_selection_not(ParseAtomSelection()));
1768  break;
1769 
1770  case pt_KW_CHAIN:
1771  {
1772  match(pt_KW_CHAIN);
1773 
1774  std::string chainID = m_value_s;
1775  if (m_lookahead == pt_NUMBER) // sigh
1776  {
1777  chainID = to_string(m_value_i);
1778  match(pt_NUMBER);
1779  }
1780  else
1781  match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
1782 
1783  result.reset(new tls_selection_chain(chainID));
1784  break;
1785  }
1786 
1787  case pt_KW_RESNAME:
1788  {
1789  match(pt_KW_RESNAME);
1790  std::string name = m_value_s;
1791  match(pt_IDENT);
1792  result.reset(new tls_selection_by_name(name));
1793  break;
1794  }
1795 
1796  case pt_KW_RESSEQ:
1797  match(pt_KW_RESSEQ);
1798  result = ParseResid();
1799  break;
1800 
1801  case pt_KW_RESID:
1802  match(pt_KW_RESID);
1803  result = ParseResid();
1804  break;
1805 
1806  case pt_KW_ALL:
1807  match(pt_KW_ALL);
1808  result.reset(new tls_selection_all());
1809  break;
1810 
1811  case pt_CHAINRESID:
1812  result = ParseChainResid();
1813  break;
1814 
1815  default:
1816  throw std::runtime_error("Unexpected token " + to_string(m_lookahead));
1817  }
1818 
1819  return result;
1820 }
1821 
1822 std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseResid()
1823 {
1824  std::unique_ptr<tls_selection> result;
1825 
1826  for (;;)
1827  {
1828  int from, to;
1829 
1830  if (m_lookahead == pt_RANGE)
1831  {
1832  from = m_value_r[0];
1833  to = m_value_r[1];
1834  match(pt_RANGE);
1835  }
1836  else
1837  {
1838  from = m_value_i;
1839  match(pt_NUMBER);
1840 
1841  to = from;
1842  if (m_lookahead == ':' or m_lookahead == '-' or m_lookahead == pt_KW_THROUGH)
1843  {
1844  match(m_lookahead);
1845  to = m_value_i;
1846  match(pt_NUMBER);
1847  }
1848  }
1849 
1850  std::unique_ptr<tls_selection> range(new tls_selection_range_seq(from, to));
1851 
1852  if (result)
1853  result.reset(new tls_selection_union(result, range));
1854  else
1855  result.reset(range.release());
1856 
1857  if (m_lookahead == ',')
1858  {
1859  match(',');
1860  continue;
1861  }
1862 
1863  break;
1864  }
1865 
1866  return result;
1867 }
1868 
1869 std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseChainResid()
1870 {
1871  std::unique_ptr<tls_selection> result;
1872 
1873  for (;;)
1874  {
1875  int from, to;
1876 
1877  from = to = m_value_i;
1878  std::string chainID = m_value_s;
1879 
1880  match(pt_CHAINRESID);
1881 
1882  if (m_lookahead == '-')
1883  {
1884  match(m_lookahead);
1885  to = m_value_i;
1886 
1887  if (m_value_s != chainID)
1888  throw std::runtime_error("Cannot have two different chainIDs in a range selection");
1889 
1890  match(pt_CHAINRESID);
1891  }
1892 
1893  std::unique_ptr<tls_selection> sc(new tls_selection_chain(chainID));
1894  std::unique_ptr<tls_selection> sr(new tls_selection_range_seq(from, to));
1895  std::unique_ptr<tls_selection> range(new tls_selection_intersection(sc, sr));
1896 
1897  if (result)
1898  result.reset(new tls_selection_union(result, range));
1899  else
1900  result.reset(range.release());
1901 
1902  if (m_lookahead == ',')
1903  {
1904  match(',');
1905  continue;
1906  }
1907 
1908  break;
1909  }
1910 
1911  return result;
1912 }
1913 
1914 // --------------------------------------------------------------------
1915 
1917 {
1918  public:
1919  virtual std::unique_ptr<tls_selection> Parse(const std::string &selection) const = 0;
1921 };
1922 
1923 template <typename IMPL>
1925 {
1926  public:
1927  virtual std::unique_ptr<tls_selection> Parse(const std::string &selection) const
1928  {
1929  std::unique_ptr<tls_selection> result;
1930 
1931  try
1932  {
1933  IMPL p(selection);
1934  result = p.Parse();
1935  }
1936  catch (const std::exception &ex)
1937  {
1938  std::cerr << "ParseError: " << ex.what() << std::endl;
1939  }
1940 
1941  return result;
1942  }
1943 };
1944 
1945 // --------------------------------------------------------------------
1946 
1947 std::unique_ptr<tls_selection> parse_tls_selection_details(const std::string &program, const std::string &selection)
1948 {
1952 
1953  std::unique_ptr<tls_selection> result;
1954 
1955  if (cif::icontains(program, "buster"))
1956  {
1957  result = buster.Parse(selection);
1958 
1959  if (not result)
1960  {
1961  if (cif::VERBOSE > 0)
1962  std::cerr << "Falling back to old BUSTER" << std::endl;
1963  result = busterOld.Parse(selection);
1964  }
1965 
1966  if (not result)
1967  {
1968  if (cif::VERBOSE > 0)
1969  std::cerr << "Falling back to PHENIX" << std::endl;
1970  result = phenix.Parse(selection);
1971  }
1972  }
1973  else if (cif::icontains(program, "phenix"))
1974  {
1975  result = phenix.Parse(selection);
1976 
1977  if (not result)
1978  {
1979  if (cif::VERBOSE > 0)
1980  std::cerr << "Falling back to BUSTER" << std::endl;
1981  result = buster.Parse(selection);
1982  }
1983 
1984  if (not result)
1985  {
1986  if (cif::VERBOSE > 0)
1987  std::cerr << "Falling back to old BUSTER" << std::endl;
1988  result = busterOld.Parse(selection);
1989  }
1990  }
1991  else
1992  {
1993  if (cif::VERBOSE > 0)
1994  std::cerr << "No known program specified, trying PHENIX" << std::endl;
1995 
1996  result = phenix.Parse(selection);
1997 
1998  if (not result)
1999  {
2000  if (cif::VERBOSE > 0)
2001  std::cerr << "Falling back to BUSTER" << std::endl;
2002  result = buster.Parse(selection);
2003  }
2004 
2005  if (not result)
2006  {
2007  if (cif::VERBOSE > 0)
2008  std::cerr << "Falling back to old BUSTER" << std::endl;
2009  result = busterOld.Parse(selection);
2010  }
2011  }
2012 
2013  return result;
2014 }
2015 
2016 } // namespace cif
std::unique_ptr< tls_selection > lhs
Definition: tls.cpp:553
void min(Image< double > &op1, const Image< double > &op2)
virtual std::unique_ptr< tls_selection > Parse()
Definition: tls.cpp:981
std::string name
Definition: tls.cpp:53
tls_selection_chain(const std::string &chainID)
Definition: tls.cpp:352
virtual std::string to_string(int token)
Definition: tls.cpp:1295
void collect_residues(cif::datablock &db, std::vector< tls_residue > &residues, size_t indentLevel) const override
Definition: tls.cpp:434
char iCode
Definition: tls.cpp:52
const int kNoSeqNum
Definition: tls.cpp:41
bool icontains(std::string_view s, std::string_view q)
Definition: text.cpp:143
tls_selection_union(std::unique_ptr< tls_selection > &lhs, std::unique_ptr< tls_selection > &rhs)
Definition: tls.cpp:477
void collect_residues(cif::datablock &db, std::vector< tls_residue > &residues, size_t indentLevel) const override
Definition: tls.cpp:565
std::unique_ptr< tls_selection > parse_tls_selection_details(const std::string &program, const std::string &selection)
Definition: tls.cpp:1947
std::string chainID
Definition: tls.cpp:50
void collect_residues(cif::datablock &db, std::vector< tls_residue > &residues, size_t indentLevel) const override
Definition: tls.cpp:530
std::string m_chain
Definition: tls.cpp:371
std::string m_name
Definition: tls.cpp:577
void collect_residues(cif::datablock &db, std::vector< tls_residue > &residues, size_t indentLevel) const override
Definition: tls.cpp:337
std::tuple< std::string, int > ParseAtom()
Definition: tls.cpp:1383
void collect_residues(cif::datablock &db, std::vector< tls_residue > &residues, size_t indentLevel) const override
Definition: tls.cpp:316
bool iequals(std::string_view a, std::string_view b)
Definition: text.cpp:59
#define i
doublereal * d
tls_selection_range_seq(int first, int last)
Definition: tls.cpp:400
glob_log first
tls_selection_range_id(int first, int last, char icodeFirst=0, char icodeLast=0)
Definition: tls.cpp:426
doublereal * b
void dump_selection(const std::vector< tls_residue > &selected, size_t indentLevel)
Definition: tls.cpp:69
void collect_residues(cif::datablock &db, std::vector< tls_residue > &residues, size_t indentLevel) const override
Definition: tls.cpp:406
tls_selection_parser_impl(const std::string &selection)
Definition: tls.cpp:613
if(fabs(c[*nmax+ *nmax *c_dim1])==0.e0)
virtual void match(int token)
Definition: tls.cpp:633
double * f
TLSSelectionParserImplPhenix(const std::string &selection)
Definition: tls.cpp:660
TLSSelectionParserImplBusterOld(const std::string &selection)
Definition: tls.cpp:1429
bool selected
Definition: tls.cpp:54
tls_selection_by_element(const std::string &element)
Definition: tls.cpp:583
void max(Image< double > &op1, const Image< double > &op2)
tls_selection_not(std::unique_ptr< tls_selection > selection)
Definition: tls.cpp:311
tls_selection_res_id(int seqNr, char iCode)
Definition: tls.cpp:376
std::unique_ptr< tls_selection > selection
Definition: tls.cpp:330
virtual std::unique_ptr< tls_selection > Parse()
Definition: tls.cpp:1417
std::string::iterator m_end
Definition: tls.cpp:628
std::unique_ptr< tls_selection > rhs
Definition: tls.cpp:513
tls_selection_union(std::unique_ptr< tls_selection > &lhs, std::unique_ptr< tls_selection > &&rhs)
Definition: tls.cpp:483
int VERBOSE
Definition: utilities.cpp:58
tls_selection_intersection(std::unique_ptr< tls_selection > &lhs, std::unique_ptr< tls_selection > &rhs)
Definition: tls.cpp:518
tls_selection_by_name(const std::string &resname)
Definition: tls.cpp:560
void collect_residues(cif::datablock &db, std::vector< tls_residue > &residues, size_t indentLevel) const override
Definition: tls.cpp:588
tls_selection_intersection(std::unique_ptr< tls_selection > &lhs, std::unique_ptr< tls_selection > &&rhs)
Definition: tls.cpp:524
std::unique_ptr< tls_selection > ParseGroup()
Definition: tls.cpp:1309
#define j
virtual std::unique_ptr< tls_selection > Parse()
Definition: tls.cpp:1710
std::string::iterator m_p
Definition: tls.cpp:628
void collect_residues(cif::datablock &db, std::vector< tls_residue > &residues, size_t indentLevel) const override
Definition: tls.cpp:357
std::unique_ptr< tls_selection > rhs
Definition: tls.cpp:554
std::unique_ptr< tls_selection > lhs
Definition: tls.cpp:512
const int kResidueNrWildcard
Definition: tls.cpp:40
std::string asymID
Definition: tls.cpp:56
void collect_residues(cif::datablock &db, std::vector< tls_residue > &residues, size_t indentLevel) const override
Definition: tls.cpp:382
std::string to_string(bond_type bondType)
Definition: compound.cpp:43
virtual std::unique_ptr< tls_selection > Parse(const std::string &selection) const
Definition: tls.cpp:1927
TLSSelectionParserImplBuster(const std::string &selection)
Definition: tls.cpp:1201
bool operator==(const tls_residue &rhs) const
Definition: tls.cpp:59
virtual ~TLSSelectionParserBase()
Definition: tls.cpp:1920
doublereal * a
void collect_residues(cif::datablock &db, std::vector< tls_residue > &residues, size_t indentLevel) const override
Definition: tls.cpp:489