Xmipp  v3.23.11-Nereus
category.cpp
Go to the documentation of this file.
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "cif++/category.hpp"
28 #include "cif++/datablock.hpp"
29 #include "cif++/parser.hpp"
30 #include "cif++/utilities.hpp"
31 
32 #include <numeric>
33 #include <stack>
34 
35 // TODO: Find out what the rules are exactly for linked items, the current implementation
36 // is inconsistent. It all depends whether a link is satified if a field taking part in the
37 // set of linked items is null at one side and not null in the other.
38 
39 namespace cif
40 {
41 
42 const uint32_t kMaxLineLength = 132;
43 
44 // --------------------------------------------------------------------
45 
47 {
48  public:
49  row_comparator(category &cat)
50  : m_category(cat)
51  {
52  auto cv = cat.get_cat_validator();
53 
54  for (auto k : cv->m_keys)
55  {
56  uint16_t ix = cat.add_column(k);
57 
58  auto iv = cv->get_validator_for_item(k);
59  if (iv == nullptr)
60  throw std::runtime_error("Incomplete dictionary, no Item Validator for Key " + k);
61 
62  auto tv = iv->m_type;
63  if (tv == nullptr)
64  throw std::runtime_error("Incomplete dictionary, no type Validator for Item " + k);
65 
66  using namespace std::placeholders;
67 
68  m_comparator.emplace_back(ix, std::bind(&type_validator::compare, tv, _1, _2));
69  }
70  }
71 
72  int operator()(const row *a, const row *b) const
73  {
74  assert(a);
75  assert(b);
76 
77  row_handle rha(m_category, *a);
78  row_handle rhb(m_category, *b);
79 
80  int d = 0;
81  for (auto &c : m_comparator)
82  {
83  uint16_t k;
84  compareFunc f;
85 
86  std::tie(k, f) = c;
87 
88  std::string_view ka = rha[k].text();
89  std::string_view kb = rhb[k].text();
90 
91  d = f(ka, kb);
92 
93  if (d != 0)
94  break;
95  }
96 
97  return d;
98  }
99 
100  int operator()(const row_initializer &a, const row *b) const
101  {
102  assert(b);
103 
104  row_handle rhb(m_category, *b);
105 
106  int d = 0, i = 0;
107  for (auto &c : m_comparator)
108  {
109  uint16_t k;
110  compareFunc f;
111 
112  std::tie(k, f) = c;
113 
114  std::string_view ka = a[i++].value();
115  std::string_view kb = rhb[k].text();
116 
117  d = f(ka, kb);
118 
119  if (d != 0)
120  break;
121  }
122 
123  return d;
124  }
125 
126  private:
127  typedef std::function<int(std::string_view, std::string_view)> compareFunc;
128  typedef std::tuple<uint16_t, compareFunc> key_comparator;
129 
130  std::vector<key_comparator> m_comparator;
131  category &m_category;
132 };
133 
134 // --------------------------------------------------------------------
135 //
136 // class to keep an index on the keys of a category. This is a red/black
137 // tree implementation.
138 
140 {
141  public:
142  category_index(category *cat)
143  : m_category(*cat)
144  , m_row_comparator(m_category)
145  , m_root(nullptr)
146  {
147  reconstruct();
148  }
149 
151  {
152  delete m_root;
153  }
154 
155  row *find(row *k) const;
156  row *find_by_value(row_initializer k) const;
157 
158  void insert(row *r);
159  void erase(row *r);
160 
161  // batch create
162  void reconstruct();
163 
164  // reorder the row's and returns new head and tail
165  std::tuple<row *, row *> reorder()
166  {
167  std::tuple<row *, row *> result = std::make_tuple(nullptr, nullptr);
168 
169  if (m_root != nullptr)
170  {
171  entry *head = find_min(m_root);
172  entry *tail = reorder(m_root);
173 
174  tail->m_row->m_next = nullptr;
175 
176  result = std::make_tuple(head->m_row, tail->m_row);
177  }
178 
179  return result;
180  }
181 
182  size_t size() const;
183  // bool isValid() const;
184 
185  private:
186  struct entry
187  {
188  entry(row *r)
189  : m_row(r)
190  , m_left(nullptr)
191  , m_right(nullptr)
192  , m_red(true)
193  {
194  }
195 
196  ~entry()
197  {
198  delete m_left;
199  delete m_right;
200  }
201 
202  row *m_row;
203  entry *m_left;
204  entry *m_right;
205  bool m_red;
206  };
207 
208  entry *insert(entry *h, row *v);
209  entry *erase(entry *h, row *k);
210 
211  // void validate(entry* h, bool isParentRed, uint32_t blackDepth, uint32_t& minBlack, uint32_t& maxBlack) const;
212 
213  entry *rotateLeft(entry *h)
214  {
215  entry *x = h->m_right;
216  h->m_right = x->m_left;
217  x->m_left = h;
218  x->m_red = h->m_red;
219  h->m_red = true;
220  return x;
221  }
222 
223  entry *rotateRight(entry *h)
224  {
225  entry *x = h->m_left;
226  h->m_left = x->m_right;
227  x->m_right = h;
228  x->m_red = h->m_red;
229  h->m_red = true;
230  return x;
231  }
232 
233  void flipColour(entry *h)
234  {
235  h->m_red = not h->m_red;
236 
237  if (h->m_left != nullptr)
238  h->m_left->m_red = not h->m_left->m_red;
239 
240  if (h->m_right != nullptr)
241  h->m_right->m_red = not h->m_right->m_red;
242  }
243 
244  bool is_red(entry *h) const
245  {
246  return h != nullptr and h->m_red;
247  }
248 
249  entry *move_red_left(entry *h)
250  {
251  flipColour(h);
252 
253  if (h->m_right != nullptr and is_red(h->m_right->m_left))
254  {
255  h->m_right = rotateRight(h->m_right);
256  h = rotateLeft(h);
257  flipColour(h);
258  }
259 
260  return h;
261  }
262 
263  entry *move_red_right(entry *h)
264  {
265  flipColour(h);
266 
267  if (h->m_left != nullptr and is_red(h->m_left->m_left))
268  {
269  h = rotateRight(h);
270  flipColour(h);
271  }
272 
273  return h;
274  }
275 
276  entry *fix_up(entry *h)
277  {
278  if (is_red(h->m_right))
279  h = rotateLeft(h);
280 
281  if (is_red(h->m_left) and is_red(h->m_left->m_left))
282  h = rotateRight(h);
283 
284  if (is_red(h->m_left) and is_red(h->m_right))
285  flipColour(h);
286 
287  return h;
288  }
289 
290  entry *find_min(entry *h)
291  {
292  while (h->m_left != nullptr)
293  h = h->m_left;
294 
295  return h;
296  }
297 
298  entry *erase_min(entry *h)
299  {
300  if (h->m_left == nullptr)
301  {
302  delete h;
303  h = nullptr;
304  }
305  else
306  {
307  if (not is_red(h->m_left) and not is_red(h->m_left->m_left))
308  h = move_red_left(h);
309 
310  h->m_left = erase_min(h->m_left);
311 
312  h = fix_up(h);
313  }
314 
315  return h;
316  }
317 
318  // Fix m_next fields for rows in order of this index
319  entry *reorder(entry *e)
320  {
321  auto result = e;
322 
323  if (e->m_left != nullptr)
324  {
325  auto l = reorder(e->m_left);
326  l->m_row->m_next = e->m_row;
327  }
328 
329  if (e->m_right != nullptr)
330  {
331  auto mr = find_min(e->m_right);
332  e->m_row->m_next = mr->m_row;
333 
334  result = reorder(e->m_right);
335  }
336 
337  return result;
338  }
339 
340  category &m_category;
341  row_comparator m_row_comparator;
342  entry *m_root;
343 };
344 
345 row *category_index::find(row *k) const
346 {
347  const entry *r = m_root;
348  while (r != nullptr)
349  {
350  int d = m_row_comparator(k, r->m_row);
351  if (d < 0)
352  r = r->m_left;
353  else if (d > 0)
354  r = r->m_right;
355  else
356  break;
357  }
358 
359  return r ? r->m_row : nullptr;
360 }
361 
362 row *category_index::find_by_value(row_initializer k) const
363 {
364  // sort the values in k first
365 
366  row_initializer k2;
367  for (auto &f : m_category.key_field_indices())
368  {
369  auto fld = m_category.get_column_name(f);
370 
371  auto ki = find_if(k.begin(), k.end(), [&fld](auto &i) { return i.name() == fld; });
372  if (ki == k.end())
373  k2.emplace_back(fld, "");
374  else
375  k2.emplace_back(*ki);
376  }
377 
378  const entry *r = m_root;
379  while (r != nullptr)
380  {
381  int d = m_row_comparator(k2, r->m_row);
382  if (d < 0)
383  r = r->m_left;
384  else if (d > 0)
385  r = r->m_right;
386  else
387  break;
388  }
389 
390  return r ? r->m_row : nullptr;
391 }
392 
394 {
395  m_root = insert(m_root, k);
396  m_root->m_red = false;
397 }
398 
399 category_index::entry *category_index::insert(entry *h, row *v)
400 {
401  if (h == nullptr)
402  return new entry(v);
403 
404  int d = m_row_comparator(v, h->m_row);
405  if (d < 0)
406  h->m_left = insert(h->m_left, v);
407  else if (d > 0)
408  h->m_right = insert(h->m_right, v);
409  else
410  {
411  row_handle rh(m_category, *v);
412 
413  std::ostringstream os;
414  for (auto col : m_category.key_fields())
415  {
416  if (rh[col])
417  os << col << ": " << std::quoted(rh[col].text()) << "; ";
418  }
419 
420  throw duplicate_key_error("Duplicate Key violation, cat: " + m_category.name() + " values: " + os.str());
421  }
422 
423  if (is_red(h->m_right) and not is_red(h->m_left))
424  h = rotateLeft(h);
425 
426  if (is_red(h->m_left) and is_red(h->m_left->m_left))
427  h = rotateRight(h);
428 
429  if (is_red(h->m_left) and is_red(h->m_right))
430  flipColour(h);
431 
432  return h;
433 }
434 
436 {
437  assert(find(k) == k);
438 
439  m_root = erase(m_root, k);
440  if (m_root != nullptr)
441  m_root->m_red = false;
442 }
443 
444 category_index::entry *category_index::erase(entry *h, row *k)
445 {
446  if (m_row_comparator(k, h->m_row) < 0)
447  {
448  if (h->m_left != nullptr)
449  {
450  if (not is_red(h->m_left) and not is_red(h->m_left->m_left))
451  h = move_red_left(h);
452 
453  h->m_left = erase(h->m_left, k);
454  }
455  }
456  else
457  {
458  if (is_red(h->m_left))
459  h = rotateRight(h);
460 
461  if (m_row_comparator(k, h->m_row) == 0 and h->m_right == nullptr)
462  {
463  delete h;
464  return nullptr;
465  }
466 
467  if (h->m_right != nullptr)
468  {
469  if (not is_red(h->m_right) and not is_red(h->m_right->m_left))
470  h = move_red_right(h);
471 
472  if (m_row_comparator(k, h->m_row) == 0)
473  {
474  h->m_row = find_min(h->m_right)->m_row;
475  h->m_right = erase_min(h->m_right);
476  }
477  else
478  h->m_right = erase(h->m_right, k);
479  }
480  }
481 
482  return fix_up(h);
483 }
484 
486 {
487  delete m_root;
488  m_root = nullptr;
489 
490  for (auto r : m_category)
491  insert(r.get_row());
492 
493  // maybe reconstruction can be done quicker by using the following commented code.
494  // however, I've not had the time to think of a way to set the red/black flag correctly in that case.
495 
496  // std::vector<row*> rows;
497  // transform(mCat.begin(), mCat.end(), backInserter(rows),
498  // [](Row r) -> row* { assert(r.mData); return r.mData; });
499  //
500  // assert(std::find(rows.begin(), rows.end(), nullptr) == rows.end());
501  //
502  // // don't use sort here, it will run out of the stack of something.
503  // // quicksort is notorious for using excessive recursion.
504  // // Besides, most of the time, the data is ordered already anyway.
505  //
506  // stable_sort(rows.begin(), rows.end(), [this](row* a, row* b) -> bool { return this->mComp(a, b) < 0; });
507  //
508  // for (size_t i = 0; i < rows.size() - 1; ++i)
509  // assert(mComp(rows[i], rows[i + 1]) < 0);
510  //
511  // deque<entry*> e;
512  // transform(rows.begin(), rows.end(), back_inserter(e),
513  // [](row* r) -> entry* { return new entry(r); });
514  //
515  // while (e.size() > 1)
516  // {
517  // deque<entry*> ne;
518  //
519  // while (not e.empty())
520  // {
521  // entry* a = e.front();
522  // e.pop_front();
523  //
524  // if (e.empty())
525  // ne.push_back(a);
526  // else
527  // {
528  // entry* b = e.front();
529  // b->mLeft = a;
530  //
531  // assert(mComp(a->mRow, b->mRow) < 0);
532  //
533  // e.pop_front();
534  //
535  // if (not e.empty())
536  // {
537  // entry* c = e.front();
538  // e.pop_front();
539  //
540  // assert(mComp(b->mRow, c->mRow) < 0);
541  //
542  // b->mRight = c;
543  // }
544  //
545  // ne.push_back(b);
546  //
547  // if (not e.empty())
548  // {
549  // ne.push_back(e.front());
550  // e.pop_front();
551  // }
552  // }
553  // }
554  //
555  // swap (e, ne);
556  // }
557  //
558  // assert(e.size() == 1);
559  // mRoot = e.front();
560 }
561 
562 size_t category_index::size() const
563 {
564  std::stack<entry *> s;
565  s.push(m_root);
566 
567  size_t result = 0;
568 
569  while (not s.empty())
570  {
571  entry *e = s.top();
572  s.pop();
573 
574  if (e == nullptr)
575  continue;
576 
577  ++result;
578 
579  s.push(e->m_left);
580  s.push(e->m_right);
581  }
582 
583  return result;
584 }
585 
586 // --------------------------------------------------------------------
587 
588 category::category(std::string_view name)
589  : m_name(name)
590 {
591 }
592 
593 category::category(const category &rhs)
594  : m_name(rhs.m_name)
595  , m_columns(rhs.m_columns)
596  , m_validator(rhs.m_validator)
597  , m_cat_validator(rhs.m_cat_validator)
598  , m_cascade(rhs.m_cascade)
599 {
600  for (auto r = rhs.m_head; r != nullptr; r = r->m_next)
601  insert_impl(end(), clone_row(*r));
602 
603  if (m_cat_validator != nullptr and m_index == nullptr)
604  m_index = new category_index(this);
605 }
606 
607 category::category(category &&rhs)
608  : m_name(std::move(rhs.m_name))
609  , m_columns(std::move(rhs.m_columns))
610  , m_validator(rhs.m_validator)
611  , m_cat_validator(rhs.m_cat_validator)
612  , m_parent_links(std::move(rhs.m_parent_links))
613  , m_child_links(std::move(rhs.m_child_links))
614  , m_cascade(rhs.m_cascade)
615  , m_index(rhs.m_index)
616  , m_head(rhs.m_head)
617  , m_tail(rhs.m_tail)
618 {
619  rhs.m_head = nullptr;
620  rhs.m_tail = nullptr;
621  rhs.m_index = nullptr;
622 }
623 
624 category &category::operator=(const category &rhs)
625 {
626  if (this != &rhs)
627  {
628  if (not empty())
629  clear();
630 
631  m_name = rhs.m_name;
632  m_columns = rhs.m_columns;
633  m_cascade = rhs.m_cascade;
634 
635  m_validator = nullptr;
636  m_cat_validator = nullptr;
637 
638  delete m_index;
639  m_index = nullptr;
640 
641  for (auto r = rhs.m_head; r != nullptr; r = r->m_next)
642  insert_impl(cend(), clone_row(*r));
643 
644  m_validator = rhs.m_validator;
645  m_cat_validator = rhs.m_cat_validator;
646 
647  if (m_cat_validator != nullptr and m_index == nullptr)
648  m_index = new category_index(this);
649  }
650 
651  return *this;
652 }
653 
654 category &category::operator=(category &&rhs)
655 {
656  if (this != &rhs)
657  {
658  m_name = std::move(rhs.m_name);
659  m_columns = std::move(rhs.m_columns);
660  m_cascade = rhs.m_cascade;
661  m_validator = rhs.m_validator;
662  m_cat_validator = rhs.m_cat_validator;
663  m_parent_links = rhs.m_parent_links;
664  m_child_links = rhs.m_child_links;
665 
666  std::swap(m_index, rhs.m_index);
667  std::swap(m_head, rhs.m_head);
668  std::swap(m_tail, rhs.m_tail);
669  }
670 
671  return *this;
672 }
673 
674 category::~category()
675 {
676  clear();
677 }
678 
679 // --------------------------------------------------------------------
680 
681 iset category::get_columns() const
682 {
683  iset result;
684 
685  for (auto &col : m_columns)
686  result.insert(col.m_name);
687 
688  return result;
689 }
690 
691 iset category::key_fields() const
692 {
693  if (m_validator == nullptr)
694  throw std::runtime_error("No Validator specified");
695 
696  if (m_cat_validator == nullptr)
697  m_validator->report_error("undefined Category", true);
698 
699  iset result;
700  for (auto &iv : m_cat_validator->m_item_validators)
701  result.insert(iv.m_tag);
702 
703  return result;
704 }
705 
706 std::set<uint16_t> category::key_field_indices() const
707 {
708  if (m_validator == nullptr)
709  throw std::runtime_error("No Validator specified");
710 
711  if (m_cat_validator == nullptr)
712  m_validator->report_error("undefined Category", true);
713 
714  std::set<uint16_t> result;
715  for (auto &k : m_cat_validator->m_keys)
716  result.insert(get_column_ix(k));
717 
718  return result;
719 }
720 
721 // --------------------------------------------------------------------
722 
723 void category::set_validator(const validator *v, datablock &db)
724 {
725  m_validator = v;
726 
727  if (m_index != nullptr)
728  {
729  delete m_index;
730  m_index = nullptr;
731  }
732 
733  if (m_validator != nullptr)
734  {
735  m_cat_validator = m_validator->get_validator_for_category(m_name);
736 
737  if (m_cat_validator != nullptr)
738  {
739  std::set<std::string> missing;
740 
741  if (not empty())
742  {
743  std::vector<uint16_t> kix;
744  for (auto k : m_cat_validator->m_keys)
745  {
746  kix.push_back(get_column_ix(k));
747  if (kix.back() >= m_columns.size())
748  missing.insert(k);
749  }
750  }
751 
752  if (missing.empty())
753  m_index = new category_index(this);
754  else if (VERBOSE > 0)
755  std::cerr << "Cannot construct index since the key field" << (missing.size() > 1 ? "s" : "") << " "
756  << cif::join(missing, ", ") + " in " + m_name + " " + (missing.size() == 1 ? "is" : "are") << " missing" << std::endl;
757  }
758  }
759  else
760  m_cat_validator = nullptr;
761 
762  for (auto &&[column, cv] : m_columns)
763  cv = m_cat_validator ? m_cat_validator->get_validator_for_item(column) : nullptr;
764 
765  update_links(db);
766 }
767 
768 void category::update_links(datablock &db)
769 {
770  m_child_links.clear();
771  m_parent_links.clear();
772 
773  if (m_validator != nullptr)
774  {
775  for (auto link : m_validator->get_links_for_parent(m_name))
776  {
777  auto childCat = db.get(link->m_child_category);
778  if (childCat == nullptr)
779  continue;
780  m_child_links.emplace_back(childCat, link);
781  }
782 
783  for (auto link : m_validator->get_links_for_child(m_name))
784  {
785  auto parentCat = db.get(link->m_parent_category);
786  if (parentCat == nullptr)
787  continue;
788  m_parent_links.emplace_back(parentCat, link);
789  }
790  }
791 }
792 
793 bool category::is_valid() const
794 {
795  bool result = true;
796 
797  if (m_validator == nullptr)
798  throw std::runtime_error("no Validator specified");
799 
800  if (empty())
801  {
802  if (VERBOSE > 2)
803  std::cerr << "Skipping validation of empty category " << m_name << std::endl;
804  return true;
805  }
806 
807  if (m_cat_validator == nullptr)
808  {
809  m_validator->report_error("undefined category " + m_name, false);
810  return false;
811  }
812 
813  auto mandatory = m_cat_validator->m_mandatory_fields;
814 
815  for (auto &col : m_columns)
816  {
817  auto iv = m_cat_validator->get_validator_for_item(col.m_name);
818  if (iv == nullptr)
819  {
820  m_validator->report_error("Field " + col.m_name + " is not valid in category " + m_name, false);
821  result = false;
822  }
823 
824  // col.m_validator = iv;
825  if (col.m_validator != iv)
826  m_validator->report_error("Column validator is not specified correctly", true);
827 
828  mandatory.erase(col.m_name);
829  }
830 
831  if (not mandatory.empty())
832  {
833  m_validator->report_error("In category " + m_name + " the following mandatory fields are missing: " + join(mandatory, ", "), false);
834  result = false;
835  }
836 
837  if (m_cat_validator->m_keys.empty() == false and m_index == nullptr)
838  {
839  std::set<std::string> missing;
840 
841  for (auto k : m_cat_validator->m_keys)
842  {
843  if (get_column_ix(k) >= m_columns.size())
844  missing.insert(k);
845  }
846 
847  m_validator->report_error("In category " + m_name + " the index is missing, likely due to missing key fields: " + join(missing, ", "), false);
848  result = false;
849  }
850 
851 #if not defined(NDEBUG)
852  // check index?
853  if (m_index)
854  {
855  if (m_index->size() != size())
856  m_validator->report_error("size of index is not equal to size of category " + m_name, true);
857 
858  // m_index->validate();
859  for (auto r : *this)
860  {
861  auto p = r.get_row();
862  if (m_index->find(p) != p)
863  m_validator->report_error("Key not found in index for category " + m_name, true);
864  }
865  }
866 #endif
867 
868  // validate all values
869  mandatory = m_cat_validator->m_mandatory_fields;
870 
871  for (auto ri = m_head; ri != nullptr; ri = ri->m_next)
872  {
873  for (uint16_t cix = 0; cix < m_columns.size(); ++cix)
874  {
875  bool seen = false;
876  auto iv = m_columns[cix].m_validator;
877 
878  if (iv == nullptr)
879  {
880  m_validator->report_error("invalid field " + m_columns[cix].m_name + " for category " + m_name, false);
881  result = false;
882  continue;
883  }
884 
885  auto vi = ri->get(cix);
886  if (vi != nullptr)
887  {
888  seen = true;
889  try
890  {
891  (*iv)(vi->text());
892  }
893  catch (const std::exception &e)
894  {
895  result = false;
896  m_validator->report_error("Error validating " + m_columns[cix].m_name + ": " + e.what(), false);
897  continue;
898  }
899  }
900 
901  if (seen or ri != m_head)
902  continue;
903 
904  if (iv != nullptr and iv->m_mandatory)
905  {
906  m_validator->report_error("missing mandatory field " + m_columns[cix].m_name + " for category " + m_name, false);
907  result = false;
908  }
909  }
910  }
911 
912  return result;
913 }
914 
915 bool category::validate_links() const
916 {
917  if (not m_validator)
918  return false;
919 
920  bool result = true;
921 
922  for (auto &link : m_parent_links)
923  {
924  auto parent = link.linked;
925 
926  if (parent == nullptr)
927  continue;
928 
929  // this particular case should be skipped, that's because it is wrong:
930  // there are atoms that are not part of a polymer, and thus will have no
931  // parent in that category.
932  if (name() == "atom_site" and (parent->name() == "pdbx_poly_seq_scheme" or parent->name() == "entity_poly_seq"))
933  continue;
934 
935  size_t missing = 0;
936  category first_missing_rows(name());
937 
938  for (auto r : *this)
939  {
940  auto cond = get_parents_condition(r, *parent);
941  if (not cond)
942  continue;
943  if (not parent->exists(std::move(cond)))
944  {
945  ++missing;
946  if (VERBOSE and first_missing_rows.size() < 5)
947  first_missing_rows.emplace(r);
948  }
949  }
950 
951  if (missing)
952  {
953  result = false;
954 
955  std::cerr << "Links for " << link.v->m_link_group_label << " are incomplete" << std::endl
956  << " There are " << missing << " items in " << m_name << " that don't have matching parent items in " << parent->m_name << std::endl;
957 
958  if (VERBOSE)
959  {
960  std::cerr << "showing first " << first_missing_rows.size() << " rows" << std::endl
961  << std::endl;
962 
963  first_missing_rows.write(std::cerr, link.v->m_child_keys, false);
964 
965  std::cerr << std::endl;
966  }
967  }
968  }
969 
970  return result;
971 }
972 
973 // --------------------------------------------------------------------
974 
975 row_handle category::operator[](const key_type &key)
976 {
977  row_handle result{};
978 
979  if (not empty())
980  {
981  if (m_index == nullptr)
982  throw std::logic_error("Category " + m_name + " does not have an index");
983 
984  auto row = m_index->find_by_value(key);
985  if (row != nullptr)
986  result = { *this, *row };
987  }
988 
989  return result;
990 }
991 
992 // --------------------------------------------------------------------
993 
994 condition category::get_parents_condition(row_handle rh, const category &parentCat) const
995 {
996  if (m_validator == nullptr or m_cat_validator == nullptr)
997  throw std::runtime_error("No validator known for category " + m_name);
998 
999  condition result;
1000 
1001  for (auto &link : m_validator->get_links_for_child(m_name))
1002  {
1003  if (link->m_parent_category != parentCat.m_name)
1004  continue;
1005 
1006  condition cond;
1007 
1008  for (size_t ix = 0; ix < link->m_child_keys.size(); ++ix)
1009  {
1010  auto childValue = rh[link->m_child_keys[ix]];
1011 
1012  if (childValue.empty())
1013  continue;
1014 
1015  cond = std::move(cond) and key(link->m_parent_keys[ix]) == childValue.text();
1016  }
1017 
1018  result = std::move(result) or std::move(cond);
1019  }
1020 
1021  return result;
1022 }
1023 
1024 condition category::get_children_condition(row_handle rh, const category &childCat) const
1025 {
1026  if (m_validator == nullptr or m_cat_validator == nullptr)
1027  throw std::runtime_error("No validator known for category " + m_name);
1028 
1029  condition result;
1030 
1031  iset mandatoryChildFields;
1032  auto childCatValidator = m_validator->get_validator_for_category(childCat.name());
1033  if (childCatValidator != nullptr)
1034  mandatoryChildFields = childCatValidator->m_mandatory_fields;
1035 
1036  for (auto &link : m_validator->get_links_for_parent(m_name))
1037  {
1038  if (link->m_child_category != childCat.m_name)
1039  continue;
1040 
1041  condition cond;
1042 
1043  for (size_t ix = 0; ix < link->m_parent_keys.size(); ++ix)
1044  {
1045  auto childKey = link->m_child_keys[ix];
1046  auto parentKey = link->m_parent_keys[ix];
1047 
1048  auto parentValue = rh[parentKey];
1049 
1050  if (parentValue.empty())
1051  cond = std::move(cond) and key(childKey) == null;
1052  else if (link->m_parent_keys.size() > 1 and mandatoryChildFields.find(childKey) == mandatoryChildFields.end())
1053  cond = std::move(cond) and (key(childKey) == parentValue.text() or key(childKey) == null);
1054  else
1055  cond = std::move(cond) and key(childKey) == parentValue.text();
1056  }
1057 
1058  result = std::move(result) or std::move(cond);
1059  }
1060 
1061  return result;
1062 }
1063 
1064 bool category::has_children(row_handle r) const
1065 {
1066  bool result = false;
1067 
1068  for (auto &&[childCat, link] : m_child_links)
1069  {
1070  if (not childCat->exists(get_children_condition(r, *childCat)))
1071  continue;
1072 
1073  result = true;
1074  break;
1075  }
1076 
1077  return result;
1078 }
1079 
1080 bool category::has_parents(row_handle r) const
1081 {
1082  bool result = false;
1083 
1084  for (auto &&[parentCat, link] : m_parent_links)
1085  {
1086  if (not parentCat->exists(get_parents_condition(r, *parentCat)))
1087  continue;
1088 
1089  result = true;
1090  break;
1091  }
1092 
1093  return result;
1094 }
1095 
1096 std::vector<row_handle> category::get_children(row_handle r, const category &childCat) const
1097 {
1098  if (m_validator == nullptr or m_cat_validator == nullptr)
1099  throw std::runtime_error("No validator known for category " + m_name);
1100 
1101  std::vector<row_handle> result;
1102 
1103  for (auto child : childCat.find(get_children_condition(r, childCat)))
1104  {
1105  if (std::find(result.begin(), result.end(), child) == result.end())
1106  result.push_back(child);
1107  }
1108 
1109  return result;
1110 }
1111 
1112 std::vector<row_handle> category::get_parents(row_handle r, const category &parentCat) const
1113 {
1114  assert(m_validator != nullptr);
1115  assert(m_cat_validator != nullptr);
1116 
1117  std::vector<row_handle> result;
1118 
1119  for (auto parent : parentCat.find(get_parents_condition(r, parentCat)))
1120  {
1121  if (std::find(result.begin(), result.end(), parent) == result.end())
1122  result.push_back(parent);
1123  }
1124 
1125  return result;
1126 }
1127 
1128 std::vector<row_handle> category::get_linked(row_handle r, const category &cat) const
1129 {
1130  std::vector<row_handle> result = get_children(r, cat);
1131  if (result.empty())
1132  result = get_parents(r, cat);
1133  return result;
1134 }
1135 
1136 // --------------------------------------------------------------------
1137 
1138 category::iterator category::erase(iterator pos)
1139 {
1140  row_handle rh = *pos;
1141  row *r = rh.get_row();
1142  iterator result = ++pos;
1143 
1144  if (m_head == nullptr)
1145  throw std::runtime_error("erase");
1146 
1147  if (m_index != nullptr)
1148  m_index->erase(r);
1149 
1150  if (r == m_head)
1151  {
1152  m_head = m_head->m_next;
1153  r->m_next = nullptr;
1154  }
1155  else
1156  {
1157  for (auto pi = m_head; pi != nullptr; pi = pi->m_next)
1158  {
1159  if (pi->m_next == r)
1160  {
1161  pi->m_next = r->m_next;
1162  r->m_next = nullptr;
1163  break;
1164  }
1165  }
1166  }
1167 
1168  // links are created based on the _pdbx_item_linked_group_list entries
1169  // in mmcif_pdbx.dic dictionary.
1170  //
1171  // For each link group in _pdbx_item_linked_group_list
1172  // a set of keys from one category is mapped to another.
1173  // If all values in a child are the same as the specified parent ones
1174  // the child is removed as well, recursively of course.
1175 
1176  if (m_validator != nullptr)
1177  {
1178  for (auto &&[childCat, link] : m_child_links)
1179  childCat->erase_orphans(get_children_condition(rh, *childCat), *this);
1180  }
1181 
1182  delete_row(r);
1183 
1184  // reset mTail, if needed
1185  if (r == m_tail)
1186  {
1187  m_tail = m_head;
1188  if (m_tail != nullptr)
1189  while (m_tail->m_next != nullptr)
1190  m_tail = m_tail->m_next;
1191  }
1192 
1193  return result;
1194 }
1195 
1196 template<typename T>
1198 {
1199  public:
1200  save_value(T &v, const T nv = {})
1201  : m_v(v)
1202  , m_sv(std::exchange(m_v, nv))
1203  {
1204  }
1205 
1207  {
1208  m_v = m_sv;
1209  }
1210 
1211  private:
1212  T &m_v;
1213  const T m_sv;
1214 };
1215 
1216 size_t category::erase(condition &&cond)
1217 {
1218  return erase(std::move(cond), {});
1219 }
1220 
1221 size_t category::erase(condition &&cond, std::function<void(row_handle)> &&visit)
1222 {
1223  size_t result = 0;
1224 
1225  cond.prepare(*this);
1226 
1227  std::map<category *, condition> potential_orphans;
1228 
1229  auto ri = begin();
1230  while (ri != end())
1231  {
1232  if (cond(*ri))
1233  {
1234  if (visit)
1235  visit(*ri);
1236 
1237  for (auto &&[childCat, link] : m_child_links)
1238  {
1239  auto ccond = get_children_condition(*ri, *childCat);
1240  if (not ccond)
1241  continue;
1242  potential_orphans[childCat] = std::move(potential_orphans[childCat]) or std::move(ccond);
1243  }
1244 
1245  save_value sv(m_validator);
1246 
1247  ri = erase(ri);
1248  ++result;
1249  }
1250  else
1251  ++ri;
1252  }
1253 
1254  for (auto &&[childCat, condition] : potential_orphans)
1255  childCat->erase_orphans(std::move(condition), *this);
1256 
1257  return result;
1258 }
1259 
1260 void category::clear()
1261 {
1262  auto i = m_head;
1263  while (i != nullptr)
1264  {
1265  auto t = i;
1266  i = i->m_next;
1267  delete_row(t);
1268  }
1269 
1270  m_head = m_tail = nullptr;
1271 
1272  delete m_index;
1273  m_index = nullptr;
1274 }
1275 
1276 void category::erase_orphans(condition &&cond, category &parent)
1277 {
1278  std::vector<row *> remove;
1279 
1280  cond.prepare(*this);
1281 
1282  for (auto r : *this)
1283  {
1284  if (not cond(r))
1285  continue;
1286 
1287  if (parent.exists(get_parents_condition(r, parent)))
1288  continue;
1289 
1290  if (VERBOSE > 1)
1291  {
1292  category c(m_name);
1293  c.emplace(r);
1294  std::cerr << "Removing orphaned record: " << std::endl
1295  << c << std::endl
1296  << std::endl;
1297 
1298  }
1299 
1300  remove.emplace_back(r.m_row);
1301  }
1302 
1303  for (auto r : remove)
1304  erase(iterator(*this, r));
1305 }
1306 
1307 std::string category::get_unique_id(std::function<std::string(int)> generator)
1308 {
1309  using namespace cif::literals;
1310 
1311  std::string id_tag = "id";
1312  if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
1313  id_tag = m_cat_validator->m_keys.front();
1314 
1315  // calling size() often is a waste of resources
1316  if (m_last_unique_num == 0)
1317  m_last_unique_num = static_cast<uint32_t>(size());
1318 
1319  for (;;)
1320  {
1321  std::string result = generator(static_cast<int>(m_last_unique_num++));
1322 
1323  if (exists(key(id_tag) == result))
1324  continue;
1325 
1326  return result;
1327  }
1328 }
1329 
1330 void category::update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value)
1331 {
1332  using namespace std::literals;
1333 
1334  if (rows.empty())
1335  return;
1336 
1337  auto colIx = get_column_ix(tag);
1338  if (colIx >= m_columns.size())
1339  throw std::runtime_error("Invalid column " + std::string{ value } + " for " + m_name);
1340 
1341  auto &col = m_columns[colIx];
1342 
1343  // check the value
1344  if (col.m_validator)
1345  (*col.m_validator)(value);
1346 
1347  // first some sanity checks, what was the old value and is it the same for all rows?
1348  std::string oldValue{ rows.front()[tag].text() };
1349  for (auto row : rows)
1350  {
1351  if (oldValue != row[tag].text())
1352  throw std::runtime_error("Inconsistent old values in update_value");
1353  }
1354 
1355  if (oldValue == value) // no need to do anything
1356  return;
1357 
1358  // update rows, but do not cascade
1359  for (auto row : rows)
1360  row.assign(colIx, value, false);
1361 
1362  // see if we need to update any child categories that depend on this value
1363  for (auto parent : rows)
1364  {
1365  for (auto &&[childCat, linked] : m_child_links)
1366  {
1367  if (std::find(linked->m_parent_keys.begin(), linked->m_parent_keys.end(), tag) == linked->m_parent_keys.end())
1368  continue;
1369 
1370  condition cond;
1371  std::string childTag;
1372 
1373  for (size_t ix = 0; ix < linked->m_parent_keys.size(); ++ix)
1374  {
1375  std::string pk = linked->m_parent_keys[ix];
1376  std::string ck = linked->m_child_keys[ix];
1377 
1378  if (pk == tag)
1379  {
1380  childTag = ck;
1381  cond = std::move(cond) && key(ck) == oldValue;
1382  }
1383  else
1384  cond = std::move(cond) && key(ck) == parent[pk].text();
1385  }
1386 
1387  auto children = childCat->find(std::move(cond));
1388  if (children.empty())
1389  continue;
1390 
1391  std::vector<row_handle> child_rows;
1392  std::copy(children.begin(), children.end(), std::back_inserter(child_rows));
1393 
1394  // now be careful. If we search back from child to parent and still find a valid parent row
1395  // we cannot simply rename the child but will have to create a new child. Unless that new
1396  // child already exists of course.
1397 
1398  std::vector<row_handle> process;
1399 
1400  for (auto child : child_rows)
1401  {
1402  condition cond_c;
1403 
1404  for (size_t ix = 0; ix < linked->m_parent_keys.size(); ++ix)
1405  {
1406  std::string pk = linked->m_parent_keys[ix];
1407  std::string ck = linked->m_child_keys[ix];
1408 
1409  cond_c = std::move(cond_c) && key(pk) == child[ck].text();
1410  }
1411 
1412  auto parents = find(std::move(cond_c));
1413  if (parents.empty())
1414  {
1415  process.push_back(child);
1416  continue;
1417  }
1418 
1419  // oops, we need to split this child, unless a row already exists for the new value
1420  condition check;
1421 
1422  for (size_t ix = 0; ix < linked->m_parent_keys.size(); ++ix)
1423  {
1424  std::string pk = linked->m_parent_keys[ix];
1425  std::string ck = linked->m_child_keys[ix];
1426 
1427  if (pk == tag)
1428  check = std::move(check) && key(ck) == value;
1429  else
1430  check = std::move(check) && key(ck) == parent[pk].text();
1431  }
1432 
1433  if (childCat->exists(std::move(check))) // phew..., narrow escape
1434  continue;
1435 
1436  // create the actual copy, if we can...
1437  if (childCat->m_cat_validator != nullptr and childCat->m_cat_validator->m_keys.size() == 1)
1438  {
1439  auto copy = childCat->create_copy(child);
1440  if (copy != child)
1441  {
1442  process.push_back(child);
1443  continue;
1444  }
1445  }
1446 
1447  // cannot update this...
1448  if (cif::VERBOSE > 0)
1449  std::cerr << "Cannot update child " << childCat->m_name << "." << childTag << " with value " << value << std::endl;
1450  }
1451 
1452  // finally, update the children
1453  if (not process.empty())
1454  childCat->update_value(process, childTag, value);
1455  }
1456  }
1457 }
1458 
1459 void category::update_value(row *row, uint16_t column, std::string_view value, bool updateLinked, bool validate)
1460 {
1461  // make sure we have an index, if possible
1462  if (m_index == nullptr and m_cat_validator != nullptr)
1463  m_index = new category_index(this);
1464 
1465  auto &col = m_columns[column];
1466 
1467  std::string_view oldValue;
1468 
1469  auto ival = row->get(column);
1470  if (ival != nullptr)
1471  oldValue = ival->text();
1472 
1473  if (value == oldValue) // no need to update
1474  return;
1475 
1476  std::string oldStrValue{ oldValue };
1477 
1478  // check the value
1479  if (col.m_validator and validate)
1480  col.m_validator->operator()(value);
1481 
1482  // If the field is part of the Key for this category, remove it from the index
1483  // before updating
1484 
1485  bool reinsert = false;
1486  if (updateLinked and // an update of an Item's value
1487  m_index != nullptr and key_field_indices().count(column))
1488  {
1489  reinsert = m_index->find(row);
1490  if (reinsert)
1491  m_index->erase(row);
1492  }
1493 
1494  // first remove old value with cix
1495  if (ival != nullptr)
1496  row->remove(column);
1497 
1498  if (not value.empty())
1499  row->append(column, { value });
1500 
1501  if (reinsert)
1502  m_index->insert(row);
1503 
1504  // see if we need to update any child categories that depend on this value
1505  auto iv = col.m_validator;
1506  if (updateLinked and iv != nullptr /*and m_cascade*/)
1507  {
1508  row_handle rh(*this, *row);
1509 
1510  for (auto &&[childCat, linked] : m_child_links)
1511  {
1512  if (std::find(linked->m_parent_keys.begin(), linked->m_parent_keys.end(), iv->m_tag) == linked->m_parent_keys.end())
1513  continue;
1514 
1515  condition cond;
1516  std::string childTag;
1517 
1518  for (size_t ix = 0; ix < linked->m_parent_keys.size(); ++ix)
1519  {
1520  std::string pk = linked->m_parent_keys[ix];
1521  std::string ck = linked->m_child_keys[ix];
1522 
1523  // TODO: add code to *NOT* test mandatory fields for Empty
1524 
1525  if (pk == iv->m_tag)
1526  {
1527  childTag = ck;
1528  cond = std::move(cond) and key(ck) == oldStrValue;
1529  }
1530  else
1531  {
1532  std::string_view pk_value = rh[pk].text();
1533  if (pk_value.empty())
1534  cond = std::move(cond) and key(ck) == null;
1535  else
1536  cond = std::move(cond) and ((key(ck) == pk_value) or key(ck) == null);
1537  }
1538  }
1539 
1540  auto rows = childCat->find(std::move(cond));
1541  if (rows.empty())
1542  continue;
1543 
1544  // if (cif::VERBOSE > 2)
1545  // {
1546  // std::cerr << "Parent: " << linked->mParentcategory << " Child: " << linked->m_child_category << std::endl
1547  // << cond << std::endl;
1548  // }
1549 
1550  // Now, suppose there are already rows in child that conform to the new value,
1551  // we then skip this rename
1552 
1553  condition cond_n;
1554 
1555  for (size_t ix = 0; ix < linked->m_parent_keys.size(); ++ix)
1556  {
1557  std::string pk = linked->m_parent_keys[ix];
1558  std::string ck = linked->m_child_keys[ix];
1559 
1560  if (pk == iv->m_tag)
1561  cond_n = std::move(cond_n) and key(ck) == value;
1562  else
1563  {
1564  std::string_view pk_value = rh[pk].text();
1565  if (pk_value.empty())
1566  cond_n = std::move(cond_n) and key(ck) == null;
1567  else
1568  cond_n = std::move(cond_n) and key(ck) == pk_value;
1569  }
1570  }
1571 
1572  auto rows_n = childCat->find(std::move(cond_n));
1573  if (not rows_n.empty())
1574  {
1575  if (cif::VERBOSE > 0)
1576  std::cerr << "Will not rename in child category since there are already rows that link to the parent" << std::endl;
1577 
1578  continue;
1579  }
1580 
1581  for (auto cr : rows)
1582  cr.assign(childTag, value, false);
1583  }
1584  }
1585 }
1586 
1587 row *category::clone_row(const row &r)
1588 {
1589  row *result = create_row();
1590 
1591  try
1592  {
1593  for (uint16_t ix = 0; ix < r.size(); ++ix)
1594  {
1595  auto &i = r[ix];
1596  if (not i)
1597  continue;
1598 
1599  result->append( ix, { i.text() });
1600  }
1601  }
1602  catch (...)
1603  {
1604  delete_row(result);
1605  throw;
1606  }
1607 
1608  return result;
1609 }
1610 
1611 void category::delete_row(row *r)
1612 {
1613  if (r != nullptr)
1614  {
1615  row_allocator_type ra(get_allocator());
1616  row_allocator_traits::destroy(ra, r);
1617  row_allocator_traits::deallocate(ra, r, 1);
1618  }
1619 }
1620 
1621 row_handle category::create_copy(row_handle r)
1622 {
1623  // copy the values
1624  std::vector<item> items;
1625 
1626  for (uint16_t ix = 0; ix < r.m_row->size(); ++ix)
1627  {
1628  auto i = r.m_row->get(ix);
1629  if (i != nullptr)
1630  items.emplace_back(m_columns[ix].m_name, i->text());
1631  }
1632 
1633  if (m_cat_validator and m_cat_validator->m_keys.size() == 1)
1634  {
1635  auto key = m_cat_validator->m_keys.front();
1636  auto kv = m_cat_validator->get_validator_for_item(key);
1637 
1638  for (auto &item : items)
1639  {
1640  if (item.name() != key)
1641  continue;
1642 
1643  if (kv->m_type->m_primitive_type == DDL_PrimitiveType::Numb)
1644  item.value(get_unique_id(""));
1645  else
1646  item.value(get_unique_id(m_name + "_id_"));
1647  break;
1648  }
1649  }
1650 
1651  return emplace(items.begin(), items.end());
1652 }
1653 
1654 // proxy methods for every insertion
1655 category::iterator category::insert_impl(const_iterator pos, row *n)
1656 {
1657  if (m_index == nullptr and m_cat_validator != nullptr)
1658  m_index = new category_index(this);
1659 
1660  assert(n != nullptr);
1661  assert(n->m_next == nullptr);
1662 
1663  if (n == nullptr)
1664  throw std::runtime_error("Invalid pointer passed to insert");
1665 
1666 // #ifndef NDEBUG
1667 // if (m_validator)
1668 // is_valid();
1669 // #endif
1670 
1671  try
1672  {
1673  // First, make sure all mandatory fields are supplied
1674  if (m_cat_validator != nullptr)
1675  {
1676  for (uint16_t ix = 0; ix < static_cast<uint16_t>(m_columns.size()); ++ix)
1677  {
1678  const auto &[column, iv] = m_columns[ix];
1679 
1680  if (iv == nullptr)
1681  continue;
1682 
1683  bool seen = false;
1684 
1685  auto i = n->get(ix);
1686  if (i != nullptr)
1687  {
1688  iv->operator()(i->text());
1689  seen = true;
1690  }
1691 
1692  if (not seen and iv->m_mandatory)
1693  throw std::runtime_error("missing mandatory field " + column + " for category " + m_name);
1694  }
1695  }
1696 
1697  if (m_index != nullptr)
1698  m_index->insert(n);
1699 
1700  // insert at end, most often this is the case
1701  if (pos.m_current == nullptr)
1702  {
1703  if (m_head == nullptr)
1704  m_tail = m_head = n;
1705  else
1706  m_tail = m_tail->m_next = n;
1707  }
1708  else
1709  {
1710  assert(m_head != nullptr);
1711 
1712  if (pos.m_current == m_head)
1713  m_head = n->m_next = m_head;
1714  else
1715  n = n->m_next = m_head->m_next;
1716  }
1717 
1718  return iterator(*this, n);
1719  }
1720  catch (const std::exception &e)
1721  {
1722  delete_row(n);
1723  throw;
1724  }
1725 
1726 // #ifndef NDEBUG
1727 // if (m_validator)
1728 // is_valid();
1729 // #endif
1730 }
1731 
1732 void category::swap_item(uint16_t column_ix, row_handle &a, row_handle &b)
1733 {
1734  assert(this == a.m_category);
1735  assert(this == b.m_category);
1736 
1737  auto &ra = *a.m_row;
1738  auto &rb = *b.m_row;
1739 
1740  std::swap(ra.at(column_ix), rb.at(column_ix));
1741 }
1742 
1743 void category::sort(std::function<int(row_handle,row_handle)> f)
1744 {
1745  if (m_head == nullptr)
1746  return;
1747 
1748  std::vector<row_handle> rows;
1749  for (auto itemRow = m_head; itemRow != nullptr; itemRow = itemRow->m_next)
1750  rows.emplace_back(*this, *itemRow);
1751 
1752  std::stable_sort(rows.begin(), rows.end(),
1753  [&f](row_handle ia, row_handle ib)
1754  {
1755  return f(ia, ib) < 0;
1756  });
1757 
1758  m_head = rows.front().get_row();
1759  m_tail = rows.back().get_row();
1760 
1761  auto r = m_head;
1762  for (size_t i = 1; i < rows.size(); ++i)
1763  r = r->m_next = rows[i].get_row();
1764  r->m_next = nullptr;
1765 
1766  assert(r == m_tail);
1767  assert(size() == rows.size());
1768 }
1769 
1770 void category::reorder_by_index()
1771 {
1772  if (m_index)
1773  std::tie(m_head, m_tail) = m_index->reorder();
1774 }
1775 
1776 namespace detail
1777 {
1778 
1779  size_t write_value(std::ostream &os, std::string_view value, size_t offset, size_t width)
1780  {
1781  if (value.find('\n') != std::string::npos or width == 0 or value.length() > 132) // write as text field
1782  {
1783  if (offset > 0)
1784  os << '\n';
1785  os << ';';
1786 
1787  char pc = 0;
1788  for (auto ch : value)
1789  {
1790  if (pc == '\n' and ch == ';')
1791  os << '\\';
1792  os << ch;
1793  pc = ch;
1794  }
1795 
1796  if (value.back() != '\n')
1797  os << '\n';
1798  os << ';' << '\n';
1799  offset = 0;
1800  }
1801  else if (sac_parser::is_unquoted_string(value))
1802  {
1803  os << value;
1804 
1805  if (value.length() < width)
1806  {
1807  os << std::string(width - value.length(), ' ');
1808  offset += width;
1809  }
1810  else
1811  {
1812  os << ' ';
1813  offset += value.length() + 1;
1814  }
1815  }
1816  else
1817  {
1818  bool done = false;
1819  for (char q : { '\'', '"' })
1820  {
1821  auto p = value.find(q); // see if we can use the quote character
1822  while (p != std::string::npos and sac_parser::is_non_blank(value[p + 1]) and value[p + 1] != q)
1823  p = value.find(q, p + 1);
1824 
1825  if (p != std::string::npos)
1826  continue;
1827 
1828  os << q << value << q;
1829 
1830  if (value.length() + 2 < width)
1831  {
1832  os << std::string(width - value.length() - 2, ' ');
1833  offset += width;
1834  }
1835  else
1836  {
1837  os << ' ';
1838  offset += value.length() + 1;
1839  }
1840 
1841  done = true;
1842  break;
1843  }
1844 
1845  if (not done)
1846  {
1847  if (offset > 0)
1848  os << '\n';
1849  os << ';' << value << '\n'
1850  << ';' << '\n';
1851  offset = 0;
1852  }
1853  }
1854 
1855  return offset;
1856  }
1857 
1858 } // namespace detail
1859 
1860 std::vector<std::string> category::get_tag_order() const
1861 {
1862  std::vector<std::string> result;
1863  for (auto &c : m_columns)
1864  result.push_back("_" + m_name + "." + c.m_name);
1865  return result;
1866 }
1867 
1868 void category::write(std::ostream &os) const
1869 {
1870  std::vector<uint16_t> order(m_columns.size());
1871  iota(order.begin(), order.end(), static_cast<uint16_t>(0));
1872  write(os, order, false);
1873 }
1874 
1875 void category::write(std::ostream &os, const std::vector<std::string> &columns, bool addMissingColumns)
1876 {
1877  // make sure all columns are present
1878  for (auto &c : columns)
1879  add_column(c);
1880 
1881  std::vector<uint16_t> order;
1882  order.reserve(m_columns.size());
1883 
1884  for (auto &c : columns)
1885  order.push_back(get_column_ix(c));
1886 
1887  if (addMissingColumns)
1888  {
1889  for (uint16_t i = 0; i < m_columns.size(); ++i)
1890  {
1891  if (std::find(order.begin(), order.end(), i) == order.end())
1892  order.push_back(i);
1893  }
1894  }
1895 
1896  write(os, order, true);
1897 }
1898 
1899 void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const
1900 {
1901  if (empty())
1902  return;
1903 
1904  // If the first Row has a next, we need a loop_
1905  bool needLoop = (m_head->m_next != nullptr);
1906 
1907  if (needLoop)
1908  {
1909  os << "loop_" << '\n';
1910 
1911  std::vector<size_t> columnWidths(m_columns.size());
1912 
1913  for (auto cix : order)
1914  {
1915  auto &col = m_columns[cix];
1916  os << '_';
1917  if (not m_name.empty())
1918  os << m_name << '.';
1919  os << col.m_name << ' ' << '\n';
1920  columnWidths[cix] = 2;
1921  }
1922 
1923  for (auto r = m_head; r != nullptr; r = r->m_next)
1924  {
1925  for (uint16_t ix = 0; ix < r->size(); ++ix)
1926  {
1927  auto v = r->get(ix);
1928  if (v == nullptr)
1929  continue;
1930 
1931  if (v->text().find('\n') == std::string_view::npos)
1932  {
1933  size_t l = v->text().length();
1934 
1935  if (not sac_parser::is_unquoted_string(v->text()))
1936  l += 2;
1937 
1938  if (l > 132)
1939  continue;
1940 
1941  if (columnWidths[ix] < l + 1)
1942  columnWidths[ix] = l + 1;
1943  }
1944  }
1945  }
1946 
1947  for (auto r = m_head; r != nullptr; r = r->m_next) // loop over rows
1948  {
1949  size_t offset = 0;
1950 
1951  for (uint16_t cix : order)
1952  {
1953  size_t w = columnWidths[cix];
1954 
1955  std::string_view s;
1956  auto iv = r->get(cix);
1957  if (iv != nullptr)
1958  s = iv->text();
1959 
1960  if (s.empty())
1961  s = "?";
1962 
1963  size_t l = s.length();
1964  if (not sac_parser::is_unquoted_string(s))
1965  l += 2;
1966  if (l < w)
1967  l = w;
1968 
1969  if (offset + l > 132 and offset > 0)
1970  {
1971  os << '\n';
1972  offset = 0;
1973  }
1974 
1975  offset = detail::write_value(os, s, offset, w);
1976 
1977  if (offset > 132)
1978  {
1979  os << '\n';
1980  offset = 0;
1981  }
1982  }
1983 
1984  if (offset > 0)
1985  os << '\n';
1986  }
1987  }
1988  else
1989  {
1990  // first find the indent level
1991  size_t l = 0;
1992 
1993  for (auto &col : m_columns)
1994  {
1995  std::string tag = '_' + m_name + '.' + col.m_name;
1996 
1997  if (l < tag.length())
1998  l = tag.length();
1999  }
2000 
2001  l += 3;
2002 
2003  for (uint16_t cix : order)
2004  {
2005  auto &col = m_columns[cix];
2006 
2007  os << '_';
2008  if (not m_name.empty())
2009  os << m_name << '.';
2010  os << col.m_name << std::string(l - col.m_name.length() - m_name.length() - 2, ' ');
2011 
2012  std::string_view s;
2013  auto iv = m_head->get(cix);
2014  if (iv != nullptr)
2015  s = iv->text();
2016 
2017  if (s.empty())
2018  s = "?";
2019 
2020  size_t offset = l;
2021  if (s.length() + l >= kMaxLineLength)
2022  {
2023  os << '\n';
2024  offset = 0;
2025  }
2026 
2027  if (detail::write_value(os, s, offset, 1) != 0)
2028  os << '\n';
2029  }
2030  }
2031 
2032  os << "# " << '\n';
2033 }
2034 
2035 bool category::operator==(const category &rhs) const
2036 {
2037  auto &a = *this;
2038  auto &b = rhs;
2039 
2040  using namespace std::placeholders;
2041 
2042 // set<std::string> tagsA(a.fields()), tagsB(b.fields());
2043 //
2044 // if (tagsA != tagsB)
2045 // std::cout << "Unequal number of fields" << std::endl;
2046 
2047  const category_validator *catValidator = nullptr;
2048 
2049  auto validator = a.get_validator();
2050  if (validator != nullptr)
2051  catValidator = validator->get_validator_for_category(a.name());
2052 
2053  typedef std::function<int(std::string_view,std::string_view)> compType;
2054  std::vector<std::tuple<std::string,compType>> tags;
2055  std::vector<std::string> keys;
2056  std::vector<size_t> keyIx;
2057 
2058  if (catValidator == nullptr)
2059  {
2060  for (auto& tag: a.get_columns())
2061  {
2062  tags.push_back(std::make_tuple(tag, [](std::string_view va, std::string_view vb) { return va.compare(vb); }));
2063  keyIx.push_back(keys.size());
2064  keys.push_back(tag);
2065  }
2066  }
2067  else
2068  {
2069  keys = catValidator->m_keys;
2070 
2071  for (auto& tag: a.key_fields())
2072  {
2073  auto iv = catValidator->get_validator_for_item(tag);
2074  if (iv == nullptr)
2075  throw std::runtime_error("missing item validator");
2076  auto tv = iv->m_type;
2077  if (tv == nullptr)
2078  throw std::runtime_error("missing type validator");
2079  tags.push_back(std::make_tuple(tag, std::bind(&cif::type_validator::compare, tv, std::placeholders::_1, std::placeholders::_2)));
2080 
2081  auto pred = [tag](const std::string& s) -> bool { return cif::iequals(tag, s) == 0; };
2082  if (find_if(keys.begin(), keys.end(), pred) == keys.end())
2083  keyIx.push_back(tags.size() - 1);
2084  }
2085  }
2086 
2087  // a.reorderByIndex();
2088  // b.reorderByIndex();
2089 
2090  auto rowEqual = [&](const row_handle& a, const row_handle& b)
2091  {
2092  int d = 0;
2093 
2094  for (auto kix: keyIx)
2095  {
2096  std::string tag;
2097  compType compare;
2098 
2099  std::tie(tag, compare) = tags[kix];
2100 
2101  d = compare(a[tag].text(), b[tag].text());
2102 
2103  if (d != 0)
2104  break;
2105  }
2106 
2107  return d == 0;
2108  };
2109 
2110  auto ai = a.begin(), bi = b.begin();
2111  while (ai != a.end() or bi != b.end())
2112  {
2113  if (ai == a.end() or bi == b.end())
2114  return false;
2115 
2116  auto ra = *ai, rb = *bi;
2117 
2118  if (not rowEqual(ra, rb))
2119  return false;
2120 
2121  std::vector<std::string> missingA, missingB, different;
2122 
2123  for (auto& tt: tags)
2124  {
2125  std::string tag;
2126  compType compare;
2127 
2128  std::tie(tag, compare) = tt;
2129 
2130  // make it an option to compare unapplicable to empty or something
2131 
2132  auto ta = ra[tag].text(); if (ta == "." or ta == "?") ta = "";
2133  auto tb = rb[tag].text(); if (tb == "." or tb == "?") tb = "";
2134 
2135  if (compare(ta, tb) != 0)
2136  return false;
2137  }
2138 
2139  ++ai;
2140  ++bi;
2141  }
2142 
2143  return true;
2144 }
2145 
2146 } // namespace cif
size_t size() const
Definition: category.cpp:562
void insert(row *r)
Definition: category.cpp:393
row * find_by_value(row_initializer k) const
Definition: category.cpp:362
void write(std::ostream &os, const datablock &db)
Definition: cif2pdb.cpp:3747
doublereal * c
int operator()(const row_initializer &a, const row *b) const
Definition: category.cpp:100
bool operator==(faketype, faketype)
Definition: gtest.h:1366
std::vector< SelLine >::iterator find(std::vector< SelLine > &text, const std::string &img_name)
Definition: selfile.cpp:553
void compare(Image< double > &op1, const Image< double > &op2)
doublereal * w
uint16_t get_column_ix(const category &cat, std::string_view col)
Definition: condition.cpp:39
size_t write_value(std::ostream &os, std::string_view value, size_t offset, size_t width)
Definition: category.cpp:1779
std::tuple< row *, row * > reorder()
Definition: category.cpp:165
bool iequals(std::string_view a, std::string_view b)
Definition: text.cpp:59
doublereal * x
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
save_value(T &v, const T nv={})
Definition: category.cpp:1200
doublereal * d
void erase(row *r)
Definition: category.cpp:435
row * find(row *k) const
Definition: category.cpp:345
doublereal * b
double * f
category_index(category *cat)
Definition: category.cpp:142
row_comparator(category &cat)
Definition: category.cpp:49
float rh
int VERBOSE
Definition: utilities.cpp:58
int operator()(const row *a, const row *b) const
Definition: category.cpp:72
void sort(struct DCEL_T *dcel)
Definition: sorting.cpp:18
#define pi
const uint32_t kMaxLineLength
Definition: category.cpp:42
int * n
doublereal * a
check(nparam, nf, nfsr, &Linfty, nineq, nineqn, neq, neqn, ncsrl, ncsrn, mode, &modem, eps, bgbnd, param)