Xmipp  v3.23.11-Nereus
validate.cpp
Go to the documentation of this file.
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "cif++/dictionary_parser.hpp"
28 #include "cif++/validate.hpp"
29 #include "cif++/utilities.hpp"
30 #include "cif++/gzio.hpp"
31 
32 #include <cassert>
33 #include <fstream>
34 #include <iostream>
35 
36 // The validator depends on regular expressions. Unfortunately,
37 // the implementation of std::regex in g++ is buggy and crashes
38 // on reading the pdbx dictionary. Therefore, in case g++ is used
39 // the code will use boost::regex instead.
40 
41 #if USE_BOOST_REGEX
42 #include <boost/regex.hpp>
43 using boost::regex;
44 #else
45 #include <regex>
46 using std::regex;
47 #endif
48 
49 namespace cif
50 {
51 
52 struct regex_impl : public regex
53 {
54  regex_impl(std::string_view rx)
55  : regex(rx.begin(), rx.end(), regex::extended | regex::optimize)
56  {
57  }
58 };
59 
60 validation_error::validation_error(const std::string &msg)
61  : m_msg(msg)
62 {
63 }
64 
65 validation_error::validation_error(const std::string &cat, const std::string &item, const std::string &msg)
66  : m_msg("When validating _" + cat + '.' + item + ": " + msg)
67 {
68 }
69 
70 // --------------------------------------------------------------------
71 
72 DDL_PrimitiveType map_to_primitive_type(std::string_view s)
73 {
74  DDL_PrimitiveType result;
75  if (iequals(s, "char"))
76  result = DDL_PrimitiveType::Char;
77  else if (iequals(s, "uchar"))
78  result = DDL_PrimitiveType::UChar;
79  else if (iequals(s, "numb"))
80  result = DDL_PrimitiveType::Numb;
81  else
82  throw validation_error("Not a known primitive type");
83  return result;
84 }
85 
86 // --------------------------------------------------------------------
87 
88 type_validator::type_validator(std::string_view name, DDL_PrimitiveType type, std::string_view rx)
89  : m_name(name)
90  , m_primitive_type(type)
91  , m_rx(new regex_impl(rx.empty() ? ".+" : rx))
92 {
93 }
94 
95 type_validator::~type_validator()
96 {
97  delete m_rx;
98 }
99 
100 template <typename T>
102 {
103  static std::from_chars_result from_chars(const char *a, const char *b, T &d)
104  {
105  return cif::from_chars(a, b, d);
106  }
107 };
108 
109 template <typename T>
111 {
112  static std::from_chars_result from_chars(const char *a, const char *b, T &d)
113  {
114  return std::from_chars(a, b, d);
115  }
116 };
117 
118 int type_validator::compare(std::string_view a, std::string_view b) const
119 {
120  int result = 0;
121 
122  if (a.empty())
123  result = b.empty() ? 0 : -1;
124  else if (b.empty())
125  result = a.empty() ? 0 : +1;
126  else
127  {
128  switch (m_primitive_type)
129  {
130  case DDL_PrimitiveType::Numb:
131  {
132  double da, db;
133 
134  using namespace cif;
135  using namespace std;
136 
137  std::from_chars_result ra, rb;
138 
139  ra = selected_charconv<double>::from_chars(a.data(), a.data() + a.length(), da);
140  rb = selected_charconv<double>::from_chars(b.data(), b.data() + b.length(), db);
141 
142  if (ra.ec == std::errc() and rb.ec == std::errc())
143  {
144  auto d = da - db;
146  {
147  if (d > 0)
148  result = 1;
149  else if (d < 0)
150  result = -1;
151  }
152  }
153  else if (ra.ec == std::errc())
154  result = 1;
155  else
156  result = -1;
157  break;
158  }
159 
160  case DDL_PrimitiveType::UChar:
161  case DDL_PrimitiveType::Char:
162  {
163  // CIF is guaranteed to have ascii only, therefore this primitive code will do
164  // also, we're collapsing spaces
165 
166  auto ai = a.begin(), bi = b.begin();
167  for (;;)
168  {
169  if (ai == a.end())
170  {
171  if (bi != b.end())
172  result = -1;
173  break;
174  }
175  else if (bi == b.end())
176  {
177  result = 1;
178  break;
179  }
180 
181  char ca = *ai;
182  char cb = *bi;
183 
184  if (m_primitive_type == DDL_PrimitiveType::UChar)
185  {
186  ca = tolower(ca);
187  cb = tolower(cb);
188  }
189 
190  result = ca - cb;
191 
192  if (result != 0)
193  break;
194 
195  if (ca == ' ')
196  {
197  while (ai[1] == ' ')
198  ++ai;
199  while (bi[1] == ' ')
200  ++bi;
201  }
202 
203  ++ai;
204  ++bi;
205  }
206 
207  break;
208  }
209  }
210  }
211 
212  return result;
213 }
214 
215 // --------------------------------------------------------------------
216 
217 // void ValidateItem::addLinked(ValidateItem* parent, const std::string& parentItem, const std::string& childItem)
218 //{
222 //
223 // if (m_type == nullptr and parent != nullptr)
224 // m_type = parent->m_type;
225 //
226 // if (parent != nullptr)
227 // {
228 // mLinked.push_back({parent, parentItem, childItem});
229 //
230 // parent->mChildren.insert(this);
234 // }
235 //}
236 
237 void item_validator::operator()(std::string_view value) const
238 {
239  if (not value.empty() and value != "?" and value != ".")
240  {
241  if (m_type != nullptr and not regex_match(value.begin(), value.end(), *m_type->m_rx))
242  throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' does not match type expression for type " + m_type->m_name);
243 
244  if (not m_enums.empty())
245  {
246  if (m_enums.count(std::string{ value }) == 0)
247  throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' is not in the list of allowed values");
248  }
249  }
250 }
251 
252 // --------------------------------------------------------------------
253 
254 void category_validator::addItemValidator(item_validator &&v)
255 {
256  if (v.m_mandatory)
257  m_mandatory_fields.insert(v.m_tag);
258 
259  v.m_category = this;
260 
261  auto r = m_item_validators.insert(std::move(v));
262  if (not r.second and VERBOSE >= 4)
263  std::cout << "Could not add validator for item " << v.m_tag << " to category " << m_name << std::endl;
264 }
265 
266 const item_validator *category_validator::get_validator_for_item(std::string_view tag) const
267 {
268  const item_validator *result = nullptr;
269  auto i = m_item_validators.find(item_validator{ std::string(tag) });
270  if (i != m_item_validators.end())
271  result = &*i;
272  else if (VERBOSE > 4)
273  std::cout << "No validator for tag " << tag << std::endl;
274  return result;
275 }
276 
277 // --------------------------------------------------------------------
278 
279 void validator::add_type_validator(type_validator &&v)
280 {
281  auto r = m_type_validators.insert(std::move(v));
282  if (not r.second and VERBOSE > 4)
283  std::cout << "Could not add validator for type " << v.m_name << std::endl;
284 }
285 
286 const type_validator *validator::get_validator_for_type(std::string_view typeCode) const
287 {
288  const type_validator *result = nullptr;
289 
290  auto i = m_type_validators.find(type_validator{ std::string(typeCode), DDL_PrimitiveType::Char, {} });
291  if (i != m_type_validators.end())
292  result = &*i;
293  else if (VERBOSE > 4)
294  std::cout << "No validator for type " << typeCode << std::endl;
295  return result;
296 }
297 
298 void validator::add_category_validator(category_validator &&v)
299 {
300  auto r = m_category_validators.insert(std::move(v));
301  if (not r.second and VERBOSE > 4)
302  std::cout << "Could not add validator for category " << v.m_name << std::endl;
303 }
304 
305 const category_validator *validator::get_validator_for_category(std::string_view category) const
306 {
307  const category_validator *result = nullptr;
308  auto i = m_category_validators.find(category_validator{ std::string(category) });
309  if (i != m_category_validators.end())
310  result = &*i;
311  else if (VERBOSE > 4)
312  std::cout << "No validator for category " << category << std::endl;
313  return result;
314 }
315 
316 item_validator *validator::get_validator_for_item(std::string_view tag) const
317 {
318  item_validator *result = nullptr;
319 
320  std::string cat, item;
321  std::tie(cat, item) = split_tag_name(tag);
322 
323  auto *cv = get_validator_for_category(cat);
324  if (cv != nullptr)
325  result = const_cast<item_validator *>(cv->get_validator_for_item(item));
326 
327  if (result == nullptr and VERBOSE > 4)
328  std::cout << "No validator for item " << tag << std::endl;
329 
330  return result;
331 }
332 
333 void validator::add_link_validator(link_validator &&v)
334 {
335  assert(v.m_parent_keys.size() == v.m_child_keys.size());
336  if (v.m_parent_keys.size() != v.m_child_keys.size())
337  throw std::runtime_error("unequal number of keys for parent and child in link");
338 
339  auto pcv = get_validator_for_category(v.m_parent_category);
340  auto ccv = get_validator_for_category(v.m_child_category);
341 
342  if (pcv == nullptr)
343  throw std::runtime_error("unknown parent category " + v.m_parent_category);
344 
345  if (ccv == nullptr)
346  throw std::runtime_error("unknown child category " + v.m_child_category);
347 
348  for (size_t i = 0; i < v.m_parent_keys.size(); ++i)
349  {
350  auto piv = pcv->get_validator_for_item(v.m_parent_keys[i]);
351 
352  if (piv == nullptr)
353  throw std::runtime_error("unknown parent tag _" + v.m_parent_category + '.' + v.m_parent_keys[i]);
354 
355  auto civ = ccv->get_validator_for_item(v.m_child_keys[i]);
356  if (civ == nullptr)
357  throw std::runtime_error("unknown child tag _" + v.m_child_category + '.' + v.m_child_keys[i]);
358 
359  if (civ->m_type == nullptr and piv->m_type != nullptr)
360  const_cast<item_validator *>(civ)->m_type = piv->m_type;
361  }
362 
363  m_link_validators.emplace_back(std::move(v));
364 }
365 
366 std::vector<const link_validator *> validator::get_links_for_parent(std::string_view category) const
367 {
368  std::vector<const link_validator *> result;
369 
370  for (auto &l : m_link_validators)
371  {
372  if (l.m_parent_category == category)
373  result.push_back(&l);
374  }
375 
376  return result;
377 }
378 
379 std::vector<const link_validator *> validator::get_links_for_child(std::string_view category) const
380 {
381  std::vector<const link_validator *> result;
382 
383  for (auto &l : m_link_validators)
384  {
385  if (l.m_child_category == category)
386  result.push_back(&l);
387  }
388 
389  return result;
390 }
391 
392 void validator::report_error(const std::string &msg, bool fatal) const
393 {
394  if (m_strict or fatal)
395  throw validation_error(msg);
396  else if (VERBOSE > 0)
397  std::cerr << msg << std::endl;
398 }
399 
400 // --------------------------------------------------------------------
401 
402 const validator &validator_factory::operator[](std::string_view dictionary_name)
403 {
404  std::lock_guard lock(m_mutex);
405 
406  for (auto &validator : m_validators)
407  {
408  if (iequals(validator.name(), dictionary_name))
409  return validator;
410  }
411 
412  // not found, try to see if it helps if we tweak the name a little
413 
414  // too bad clang version 10 did not have a constructor for std::filesystem::path that accepts a std::string_view
415  std::filesystem::path dictionary(dictionary_name.data(), dictionary_name.data() + dictionary_name.length());
416 
417  if (dictionary.extension() != ".dic")
418  {
419  auto dict_name = dictionary.filename().string() + ".dic";
420 
421  for (auto &validator : m_validators)
422  {
423  if (iequals(validator.name(), dict_name))
424  return validator;
425  }
426  }
427 
428  // not found, add it
429 
430 
431  auto data = load_resource(dictionary_name);
432 
433  if (not data and dictionary.extension().string() != ".dic")
434  data = load_resource(dictionary.parent_path() / (dictionary.filename().string() + ".dic"));
435 
436  if (data)
437  construct_validator(dictionary_name, *data);
438  else
439  {
440  std::error_code ec;
441 
442  // might be a compressed dictionary on disk
443  std::filesystem::path p = dictionary;
444  if (p.extension() == ".dic")
445  p = p.parent_path() / (p.filename().string() + ".gz");
446  else
447  p = p.parent_path() / (p.filename().string() + ".dic.gz");
448 
449 #if defined(CACHE_DIR) or defined(DATA_DIR)
450  if (not std::filesystem::exists(p, ec) or ec)
451  {
452  for (const char *dir : {
453 #if defined(CACHE_DIR)
454  CACHE_DIR,
455 #endif
456 #if defined(DATA_DIR)
457  DATA_DIR
458 #endif
459  })
460  {
461  auto p2 = std::filesystem::path(dir) / p;
462  if (std::filesystem::exists(p2, ec) and not ec)
463  {
464  swap(p, p2);
465  break;
466  }
467  }
468  }
469 #endif
470 
471  if (std::filesystem::exists(p, ec) and not ec)
472  {
473  gzio::ifstream in(p);
474 
475  if (not in.is_open())
476  throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
477 
478  construct_validator(dictionary_name, in);
479  }
480  else
481  throw std::runtime_error("Dictionary not found or defined (" + dictionary.string() + ")");
482  }
483 
484  return m_validators.back();
485 }
486 
487 void validator_factory::construct_validator(std::string_view name, std::istream &is)
488 {
489  m_validators.emplace_back(parse_dictionary(name, is));
490 }
491 
492 } // namespace cif
void compare(Image< double > &op1, const Image< double > &op2)
void abs(Image< double > &op)
validator parse_dictionary(std::string_view name, std::istream &is)
std::unique_ptr< std::istream > load_resource(std::filesystem::path name)
Definition: utilities.cpp:943
DDL_PrimitiveType map_to_primitive_type(std::string_view s)
Definition: validate.cpp:72
bool iequals(std::string_view a, std::string_view b)
Definition: text.cpp:59
#define i
doublereal * d
regex_impl(std::string_view rx)
Definition: validate.cpp:54
doublereal * b
viol type
int in
std::tuple< std::string, std::string > split_tag_name(std::string_view tag)
Definition: text.cpp:218
int VERBOSE
Definition: utilities.cpp:58
basic_istream< char, std::char_traits< char > > istream
Definition: utilities.cpp:815
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
Definition: validate.cpp:103
double epsilon
doublereal * a
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
Definition: validate.cpp:112