Xmipp  v3.23.11-Nereus
dictionary_parser.cpp
Go to the documentation of this file.
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "cif++/condition.hpp"
28 #include "cif++/dictionary_parser.hpp"
29 #include "cif++/file.hpp"
30 #include "cif++/parser.hpp"
31 
32 namespace cif
33 {
34 
35 using namespace literals;
36 
37 class dictionary_parser : public parser
38 {
39  public:
40  dictionary_parser(validator &validator, std::istream &is, file &f)
41  : parser(is, f)
42  , m_validator(validator)
43  {
44  }
45 
47  {
48  std::unique_ptr<datablock> dict;
49  auto savedDatablock = m_datablock;
50 
51  try
52  {
53  while (m_lookahead != CIFToken::Eof)
54  {
55  switch (m_lookahead)
56  {
57  case CIFToken::GLOBAL:
58  parse_global();
59  break;
60 
61  default:
62  {
63  dict.reset(new datablock(m_token_value)); // dummy datablock, for constructing the validator only
64  m_datablock = dict.get();
65 
66  match(CIFToken::DATA);
67  parse_datablock();
68  break;
69  }
70  }
71  }
72  }
73  catch (const std::exception &ex)
74  {
75  error(ex.what());
76  }
77 
78  // store all validators
79  for (auto &ic : mCategoryValidators)
80  m_validator.add_category_validator(std::move(ic));
81  mCategoryValidators.clear();
82 
83  for (auto &iv : mItemValidators)
84  {
85  auto cv = m_validator.get_validator_for_category(iv.first);
86  if (cv == nullptr)
87  error("Undefined category '" + iv.first);
88 
89  for (auto &v : iv.second)
90  const_cast<category_validator *>(cv)->addItemValidator(std::move(v));
91  }
92 
93  // check all item validators for having a typeValidator
94 
95  if (dict)
96  link_items();
97 
98  // store meta information
99  datablock::iterator info;
100  bool is_new;
101  std::tie(info, is_new) = m_datablock->emplace("dictionary");
102  if (not is_new and not info->empty())
103  {
104  auto r = info->front();
105  m_validator.set_name(r["title"].as<std::string>());
106  m_validator.version(r["version"].as<std::string>());
107  }
108 
109  m_datablock = savedDatablock;
110 
111  mItemValidators.clear();
112  }
113 
114  private:
115  void parse_save_frame() override
116  {
117  if (not m_collected_item_types)
118  m_collected_item_types = collect_item_types();
119 
120  std::string saveFrameName = m_token_value;
121 
122  if (saveFrameName.empty())
123  error("Invalid save frame, should contain more than just 'save_' here");
124 
125  bool isCategorySaveFrame = m_token_value[0] != '_';
126 
127  datablock dict(m_token_value);
128  datablock::iterator cat = dict.end();
129 
130  match(CIFToken::SAVE);
131  while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag)
132  {
133  if (m_lookahead == CIFToken::LOOP)
134  {
135  cat = dict.end(); // should start a new category
136 
137  match(CIFToken::LOOP);
138 
139  std::vector<std::string> tags;
140  while (m_lookahead == CIFToken::Tag)
141  {
142  std::string catName, item_name;
143  std::tie(catName, item_name) = split_tag_name(m_token_value);
144 
145  if (cat == dict.end())
146  std::tie(cat, std::ignore) = dict.emplace(catName);
147  else if (not iequals(cat->name(), catName))
148  error("inconsistent categories in loop_");
149 
150  tags.push_back(item_name);
151  match(CIFToken::Tag);
152  }
153 
154  while (m_lookahead == CIFToken::Value)
155  {
156  cat->emplace({});
157  auto row = cat->back();
158 
159  for (auto tag : tags)
160  {
161  row[tag] = m_token_value;
162  match(CIFToken::Value);
163  }
164  }
165 
166  cat = dict.end();
167  }
168  else
169  {
170  std::string catName, item_name;
171  std::tie(catName, item_name) = split_tag_name(m_token_value);
172 
173  if (cat == dict.end() or not iequals(cat->name(), catName))
174  std::tie(cat, std::ignore) = dict.emplace(catName);
175 
176  match(CIFToken::Tag);
177 
178  if (cat->empty())
179  cat->emplace({});
180  cat->back()[item_name] = m_token_value;
181 
182  match(CIFToken::Value);
183  }
184  }
185 
186  match(CIFToken::SAVE);
187 
188  if (isCategorySaveFrame)
189  {
190  std::string category = dict["category"].front().get<std::string>("id");
191 
192  std::vector<std::string> keys;
193  for (auto k : dict["category_key"])
194  keys.push_back(std::get<1>(split_tag_name(k["name"].as<std::string>())));
195 
196  iset groups;
197  for (auto g : dict["category_group"])
198  groups.insert(g["id"].as<std::string>());
199 
200  mCategoryValidators.push_back(category_validator{ category, keys, groups });
201  }
202  else
203  {
204  // if the type code is missing, this must be a pointer, just skip it
205  std::string typeCode = dict["item_type"].front().get<std::string>("code");
206 
207  const type_validator *tv = nullptr;
208  if (not(typeCode.empty() or typeCode == "?"))
209  tv = m_validator.get_validator_for_type(typeCode);
210 
211  iset ess;
212  for (auto e : dict["item_enumeration"])
213  ess.insert(e["value"].as<std::string>());
214 
215  std::string defaultValue = dict["item_default"].front().get<std::string>("value");
216  bool defaultIsNull = false;
217  if (defaultValue.empty())
218  {
219  // TODO: Is this correct???
220  for (auto r : dict["_item_default"])
221  {
222  defaultIsNull = r["value"].is_null();
223  break;
224  }
225  }
226 
227  // collect the dict from our dataBlock and construct validators
228  for (auto i : dict["item"])
229  {
230  std::string tagName, category, mandatory;
231  cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
232 
233  std::string cat_name, item_name;
234  std::tie(cat_name, item_name) = split_tag_name(tagName);
235 
236  if (cat_name.empty() or item_name.empty())
237  error("Invalid tag name in _item.name " + tagName);
238 
239  if (not iequals(category, cat_name) and not(category.empty() or category == "?"))
240  error("specified category id does match the implicit category name for tag '" + tagName + '\'');
241  else
242  category = cat_name;
243 
244  auto &ivs = mItemValidators[category];
245 
246  auto vi = find(ivs.begin(), ivs.end(), item_validator{ item_name });
247  if (vi == ivs.end())
248  ivs.push_back(item_validator{ item_name, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull });
249  else
250  {
251  // need to update the itemValidator?
252  if (vi->m_mandatory != (iequals(mandatory, "yes")))
253  {
254  if (VERBOSE > 2)
255  {
256  std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl;
257 
258  if (iequals(tagName, saveFrameName))
259  std::cerr << "choosing " << mandatory << std::endl;
260  else
261  std::cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << std::endl;
262  }
263 
264  if (iequals(tagName, saveFrameName))
265  vi->m_mandatory = (iequals(mandatory, "yes"));
266  }
267 
268  if (vi->m_type != nullptr and tv != nullptr and vi->m_type != tv)
269  {
270  if (VERBOSE > 1)
271  std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl;
272  }
273 
274  // vi->mMandatory = (iequals(mandatory, "yes"));
275  if (vi->m_type == nullptr)
276  vi->m_type = tv;
277 
278  vi->m_enums.insert(ess.begin(), ess.end());
279 
280  // anything else yet?
281  // ...
282  }
283  }
284 
285  // collect the dict from our dataBlock and construct validators
286  for (auto i : dict["item_linked"])
287  {
288  mLinkedItems.emplace(i.get<std::string,std::string>("child_name", "parent_name"));
289  }
290  }
291  }
292 
293  void link_items()
294  {
295  if (not m_datablock)
296  error("no datablock");
297 
298  auto &dict = *m_datablock;
299 
300  // links are identified by a parent category, a child category and a group ID
301 
302  using key_type = std::tuple<std::string, std::string, int>;
303 
304  std::map<key_type, size_t> linkIndex;
305 
306  // Each link group consists of a set of keys
307  std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>> linkKeys;
308 
309  auto addLink = [&](size_t ix, const std::string &pk, const std::string &ck)
310  {
311  auto &&[pkeys, ckeys] = linkKeys.at(ix);
312 
313  bool found = false;
314  for (size_t i = 0; i < pkeys.size(); ++i)
315  {
316  if (pkeys[i] == pk and ckeys[i] == ck)
317  {
318  found = true;
319  break;
320  }
321  }
322 
323  if (not found)
324  {
325  pkeys.push_back(pk);
326  ckeys.push_back(ck);
327  }
328  };
329 
330  auto &linkedGroupList = dict["pdbx_item_linked_group_list"];
331 
332  for (auto gl : linkedGroupList)
333  {
334  std::string child, parent;
335  int link_group_id;
336  cif::tie(child, parent, link_group_id) = gl.get("child_name", "parent_name", "link_group_id");
337 
338  auto civ = m_validator.get_validator_for_item(child);
339  if (civ == nullptr)
340  error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
341 
342  auto piv = m_validator.get_validator_for_item(parent);
343  if (piv == nullptr)
344  error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
345 
346  key_type key{ piv->m_category->m_name, civ->m_category->m_name, link_group_id };
347  if (not linkIndex.count(key))
348  {
349  linkIndex[key] = linkKeys.size();
350  linkKeys.push_back({});
351  }
352 
353  size_t ix = linkIndex.at(key);
354  addLink(ix, piv->m_tag, civ->m_tag);
355  }
356 
357  // Only process inline linked items if the linked group list is absent
358  if (linkedGroupList.empty())
359  {
360  // for links recorded in categories but not in pdbx_item_linked_group_list
361  for (auto li : mLinkedItems)
362  {
363  std::string child, parent;
364  std::tie(child, parent) = li;
365 
366  auto civ = m_validator.get_validator_for_item(child);
367  if (civ == nullptr)
368  error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
369 
370  auto piv = m_validator.get_validator_for_item(parent);
371  if (piv == nullptr)
372  error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
373 
374  key_type key{ piv->m_category->m_name, civ->m_category->m_name, 0 };
375  if (not linkIndex.count(key))
376  {
377  linkIndex[key] = linkKeys.size();
378  linkKeys.push_back({});
379  }
380 
381  size_t ix = linkIndex.at(key);
382  addLink(ix, piv->m_tag, civ->m_tag);
383  }
384  }
385 
386  auto &linkedGroup = dict["pdbx_item_linked_group"];
387 
388  // now store the links in the validator
389  for (auto &kv : linkIndex)
390  {
391  link_validator link = {};
392  std::tie(link.m_parent_category, link.m_child_category, link.m_link_group_id) = kv.first;
393 
394  std::tie(link.m_parent_keys, link.m_child_keys) = linkKeys[kv.second];
395 
396  // look up the label
397  for (auto r : linkedGroup.find("category_id"_key == link.m_child_category and "link_group_id"_key == link.m_link_group_id))
398  {
399  link.m_link_group_label = r["label"].as<std::string>();
400  break;
401  }
402 
403  m_validator.add_link_validator(std::move(link));
404  }
405 
406  // now make sure the itemType is specified for all itemValidators
407 
408  for (auto &cv : m_validator.m_category_validators)
409  {
410  for (auto &iv : cv.m_item_validators)
411  {
412  if (iv.m_type == nullptr and cif::VERBOSE >= 0)
413  std::cerr << "Missing item_type for " << iv.m_tag << std::endl;
414  }
415  }
416  }
417 
418  bool collect_item_types()
419  {
420  bool result = false;
421 
422  if (not m_datablock)
423  error("no datablock");
424 
425  auto &dict = *m_datablock;
426 
427  for (auto t : dict["item_type_list"])
428  {
429  std::string code, primitiveCode, construct;
430  cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");
431 
432  replace_all(construct, "\\n", "\n");
433  replace_all(construct, "\\t", "\t");
434  replace_all(construct, "\\\n", "");
435 
436  try
437  {
438  type_validator v = {
439  code, map_to_primitive_type(primitiveCode), construct
440  };
441 
442  m_validator.add_type_validator(std::move(v));
443  }
444  catch (const std::exception &)
445  {
446  std::throw_with_nested(parse_error(/*t.lineNr()*/ 0, "error in regular expression"));
447  }
448 
449  // Do not replace an already defined type validator, this won't work with pdbx_v40
450  // as it has a name that is too strict for its own names :-)
451  // if (mFileImpl.mTypeValidators.count(v))
452  // mFileImpl.mTypeValidators.erase(v);
453 
454  if (VERBOSE >= 5)
455  std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl;
456 
457  result = true;
458  }
459 
460  return result;
461  }
462 
463  validator &m_validator;
464  bool m_collected_item_types = false;
465 
466  std::vector<category_validator> mCategoryValidators;
467  std::map<std::string, std::vector<item_validator>> mItemValidators;
468  std::set<std::tuple<std::string, std::string>> mLinkedItems;
469 };
470 
471 // --------------------------------------------------------------------
472 
473 validator parse_dictionary(std::string_view name, std::istream &is)
474 {
475  validator result(name);
476 
477  file f;
478  dictionary_parser p(result, is, f);
479  p.load_dictionary();
480 
481  return result;
482 }
483 
484 } // namespace cif
void replace_all(std::string &s, std::string_view what, std::string_view with)
Definition: text.cpp:134
doublereal * g
std::vector< SelLine >::iterator find(std::vector< SelLine > &text, const std::string &img_name)
Definition: selfile.cpp:553
validator parse_dictionary(std::string_view name, std::istream &is)
DDL_PrimitiveType map_to_primitive_type(std::string_view s)
Definition: validate.cpp:72
bool iequals(std::string_view a, std::string_view b)
Definition: text.cpp:59
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
double * f
std::tuple< std::string, std::string > split_tag_name(std::string_view tag)
Definition: text.cpp:218
int VERBOSE
Definition: utilities.cpp:58
basic_istream< char, std::char_traits< char > > istream
Definition: utilities.cpp:815
void error(char *s)
Definition: tools.cpp:107
dictionary_parser(validator &validator, std::istream &is, file &f)