Xmipp  v3.23.11-Nereus
metadata_base.h
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: J.M. de la Rosa Trevin (jmdelarosa@cnb.csic.es)
4  * Jan Horacek (xhorace4@fi.muni.cz)
5  *
6  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21  * 02111-1307 USA
22  *
23  * All comments concerning this program package may be sent to the
24  * e-mail address 'xmipp@cnb.csic.es'
25  ***************************************************************************/
26 
32 #ifndef CORE_METADATA_H
33 #define CORE_METADATA_H
34 
35 #include <cstddef>
36 #include <map>
37 #include <cmath>
38 #include <memory>
39 #include <iostream>
40 
41 #include "xmipp_error.h"
42 #include "xmipp_filename.h"
43 #include "metadata_label.h"
44 #include "metadata_writemode.h"
45 #include "metadata_base_it.h"
46 #include "metadata_static.h"
47 #include "metadata_query.h"
48 #include "choose.h"
49 
55 #define BAD_OBJID 0
56 #define BAD_INDEX -1
57 
58 #define FILENAME_XMIPP_STAR "# XMIPP_STAR_1"
59 #define FILENAME_XMIPP_SQLITE "SQLite format 3"
60 #define DEFAULT_BLOCK_NAME "noname"
61 
62 // FIXME: deprecated
63 // Preffered iterating is on right side of these macros
64 #define FOR_ALL_OBJECTS_IN_METADATA(__md) for (size_t objId : __md.ids())
65 
66 // FIXME: deprecated
67 #define FOR_ALL_ROWS_IN_METADATA(__md) for (auto& row : __md)
68 
69 #define END_OF_LINE() ((char*) memchr (iter, '\n', end-iter))
70 
72 void getBlocksInMetaDataFile(const FileName &inFile, StringVector& blockList);
73 bool existsBlockInMetaDataFile(const FileName &inFile, const String& inBlock);
74 bool existsBlockInMetaDataFile(const FileName &inFileWithBlock);
75 
76 class MDValueGenerator;
77 
81 typedef struct {
82  char * begin;
83  size_t size;
84 }
85 mdBuffer;
86 
88 #define BUFFER_CREATE(b) mdBuffer b; b.begin = nullptr; b.size = 0
89 #define BUFFER_COPY(b1, b2) mdBuffer b2; b2.begin = b1.begin; b2.size = b1.size
90 #define BUFFER_MOVE(b, n) b.begin += n; b.size -= n
91 #define BUFFER_FIND(b, str, n) (char*) _memmem(b.begin, b.size, str, n)
92 
93 typedef struct {
94  char * begin; //Position of _dataXXX on buffer
95  size_t nameSize; //Number of charater of block name, counting after _data
96  char * end; //Position just before next _dataXXX or end of buffer
97  char * loop; //Position of _loop if exists, NULL otherwise
98 }
99 mdBlock;
101 #define BLOCK_CREATE(b) mdBlock b; b.begin = b.end = b.loop = nullptr; b.nameSize = 0
102 #define BLOCK_INIT(b) b.begin = b.end = b.loop = nullptr; b.nameSize = 0
103 #define BLOCK_NAME(b, s) s.assign(b.begin, b.nameSize)
104 
105 class ObjectDoesNotExist: public std::logic_error {
106 public:
107  ObjectDoesNotExist(MDLabel label, size_t id) : std::logic_error("Label '" + MDL::label2Str(label) + " does not exist for id " + std::to_string(id)) {};
108  ObjectDoesNotExist(size_t id, const FileName &fn) : std::logic_error("Row with id " + std::to_string(id) + " does not exist" + (fn.isEmpty() ? "" : " in " + fn)) {};
109 };
110 
111 class NotImplemented : public std::logic_error {
112 public:
113  NotImplemented(const std::string& str) : std::logic_error(str) {};
114 };
115 
116 class ColumnDoesNotExist : public std::logic_error {
117 public:
118  ColumnDoesNotExist(MDLabel label, const FileName &fn) : std::logic_error("Column '" + MDL::label2Str(label) + " does not exist" + (fn.isEmpty() ? "" : " in " + fn)) {};
119  ColumnDoesNotExist(const std::string &msg) : std::logic_error(msg) {};
120 };
121 
122 
176 class MetaData {
177 private:
178  bool _isColumnFormat; // Format for the file, column or row formatted
179 protected:
180  /* Allows a fast search for pairs where the value is
181  * a string, i.e. looking for filenames which is quite usual.
182  */
183  std::map<String, size_t> _fastStringSearch;
186 #define line_max 70
188 
189 
190  int _precision = 1000;
191 
196 
200  size_t _maxRows = 0, _parsedLines = 0;
201 
202  void copyInfo(const MetaData& md);
203  double precision() const;
204 
205  virtual void readStar(const FileName &filename, const std::vector<MDLabel> *desiredLabels,
206  const String &blockRegExp, bool decomposeStack);
207 
208  virtual void _readColumns(std::istream& is, std::vector<MDObject*> & columnValues,
209  const std::vector<MDLabel>* desiredLabels = nullptr);
210 
211  virtual void _readColumnsStar(mdBlock &block,
212  std::vector<MDObject*> & columnValues,
213  const std::vector<MDLabel>* desiredLabels,
214  bool addColumns = true,
215  size_t id = BAD_OBJID);
216 
217  /* Helper function to parse an MDObject and set its value.
218  * The parsing will be from an input stream(istream)
219  * and if parsing fails, an error will be raised
220  */
221  virtual void _parseObject(std::istream &is, MDObject &object, size_t id = BAD_OBJID);
222 
223  virtual void _parseObjects(std::istream &is, std::vector<MDObject*> & columnValues,
224  const std::vector<MDLabel> *desiredLabels, bool firstTime) = 0;
225 
226  /* This function will be used to parse the rows data
227  * having read the columns labels before and setting which are desired
228  * the useCommentAsImage is for compatibility with old DocFile format
229  * where the image were in comments
230  */
231  virtual void _readRows(std::istream& is, std::vector<MDObject*>& columnValues, bool useCommentAsImage);
232 
239  virtual void _readRowsStar(mdBlock &block, std::vector<MDObject*> & columnValues,
240  const std::vector<MDLabel> *desiredLabels);
241 
242 public:
247 
258  MetaData() = default;
259  virtual ~MetaData();
260 
263  virtual void clear();
272  virtual bool isColumnFormat() const { return _isColumnFormat; }
273 
277  virtual void setMaxRows(size_t maxRows=0) { _maxRows = maxRows; }
278 
283  virtual size_t getParsedLines() { return _parsedLines; }
284 
288  virtual void setPrecission(int _precision) { this->_precision = (int)pow (10,_precision); }
289 
293  virtual void setColumnFormat(bool column) { _isColumnFormat = column; }
294 
298  virtual void writeXML(const FileName fn, const FileName blockname, WriteModeMetaData mode) const = 0;
299 
303  virtual void writeText(const FileName fn, const std::vector<MDLabel>* desiredLabels) const = 0;
304 
305  virtual void writeStar(const FileName &outFile, const String & blockName, WriteModeMetaData mode) const;
306 
310  virtual String getComment() const { return this->_comment; }
311 
315  virtual void setComment(const String &newComment = "No comment") { this->_comment = newComment; }
316 
319  virtual FileName getFilename() const { return this->_inFile; }
320 
323  virtual void setFilename(const FileName &_filename) { this->_inFile = _filename; }
324 
327  virtual std::vector<MDLabel> getActiveLabels() const = 0;
328 
331  virtual int getMaxStringLength(const MDLabel thisLabel) const = 0;
332 
341  virtual bool setValueCol(const MDObject &mdValueIn) = 0;
342 
349  template<class T>
350  bool setValueCol(const MDLabel label, const T &valueIn) {
351  return setValueCol(MDObject(label, valueIn));
352  }
353 
366  template<class T>
367  bool setValue(const MDLabel label, const T &valueIn, size_t id) {
368  return setValue(MDObject(label, valueIn), id);
369  }
370 
371  virtual size_t addRow(const MDRow &row) = 0;
372 
374  if (&md == this) {
375  return *this;
376  }
377  this->clear();
378  this->copyInfo(md);
379  for (const auto& row : md)
380  this->addRow(row);
381  return *this;
382  };
383 
390  virtual bool setValue(const MDObject &mdValueIn, size_t id) = 0;
391  virtual bool getValue(MDObject &mdValueOut, size_t id) const = 0;
392  // FIXME: deprecated; this function should return MDObject, it is const-unsafe
393 
408  template<class T>
409  T getValue(const MDLabel label, size_t id) const {
410  MDObject mdValueOut(label);
411  if (!getValue(mdValueOut, id))
412  throw ObjectDoesNotExist(label, id);
413  return mdValueOut.getValue2(T());
414  }
415 
416  template<class T>
417  bool getValue(const MDLabel label, T &valueOut, size_t id) const {
418  MDObject mdValueOut(label);
419  if (!getValue(mdValueOut, id))
420  return false;
421  valueOut = mdValueOut.getValue2(T());
422  return true;
423  }
424 
425  template<class T>
426  const T& getValueOrAbort(const MDLabel label, size_t id) const {
427  return getValue<T>(label, id);
428  }
429 
430  template<class T>
431  T& getValueOrAbort(const MDLabel label, size_t id) {
432  return getValue<T>(label, id);
433  }
434 
435  template<class T>
436  void getValueOrAbort(const MDLabel label, T &valueOut, size_t id) const { // FIXME: deprecated
437  if (!getValue(label, valueOut,id))
438  REPORT_ERROR(ERR_ARG_MISSING,(String)"Cannot find label: " + MDL::label2Str(label));
439  }
440 
441  template <typename T>
442  const T getValueOrDefault(const MDLabel label, size_t id, const T &_default) const {
443  MDObject mdValueOut(label);
444  if (!getValue(mdValueOut, id))
445  return _default;
446  return mdValueOut.getValue2(T());
447  }
448 
449  template <typename T>
450  T getValueOrDefault(const MDLabel label, size_t id, T &_default) {
451  MDObject mdValueOut(label);
452  if (!getValue(mdValueOut, id))
453  return _default;
454  return mdValueOut.getValue2(T());
455  }
456 
457  template <typename T, typename T1>
458  void getValueOrDefault(const MDLabel label, T &valueOut, size_t id, const T1 &_default) const { // FIXME: deprecated
459  if (!getValue(label, valueOut,id))
460  valueOut = (T) _default;
461  }
462 
465  template<class T>
466  std::vector<T> getColumnValues(const MDLabel label) const {
467  std::vector<T> result;
468  MDObject mdValueOut(label);
469  std::vector<size_t> objectsId;
470  findObjects(objectsId);
471  size_t n = objectsId.size();
472  result.reserve(n);
473  for (size_t i = 0; i < n; ++i) {
474  if (!getValue(mdValueOut, objectsId[i]))
475  throw std::logic_error("Column does not exist!");
476  result.emplace_back(mdValueOut.getValue2(T()));
477  }
478  return result;
479  }
480 
481  // FIXME: deprecated
482  template<class T>
483  void getColumnValues(const MDLabel label, std::vector<T>& out) const {
484  out = this->getColumnValues<T>(label);
485  }
486 
487  // FIXME: add proper return value
488  virtual bool getRowValues(size_t id, std::vector<MDObject> &values) const = 0;
489 
492  // FIXME: add proper return value
493  virtual void getColumnValues(const MDLabel label, std::vector<MDObject> &valuesOut) const = 0;
494 
498  template<class T>
499  void setColumnValues(const MDLabel label, const std::vector<T> &valuesIn) {
500  if (valuesIn.size() != size())
501  REPORT_ERROR(ERR_MD_OBJECTNUMBER, "Input vector must be of the same size as the metadata");
502  size_t n = 0;
503  for (size_t objId : this->ids())
504  this->setValue(label, valuesIn[n++], objId);
505  }
506 
507  virtual void setColumnValues(const std::vector<MDObject> &valuesIn) = 0;
508 
509  virtual std::unique_ptr<MDRow> getRow(size_t id) = 0;
510  virtual std::unique_ptr<const MDRow> getRow(size_t id) const = 0;
511 
514  virtual bool setValueFromStr(const MDLabel label, const String &value, size_t id);
515 
518  virtual bool getStrFromValue(const MDLabel label, String &strOut, size_t id) const;
519 
522  virtual bool isEmpty() const { return size() == 0; }
523 
526  virtual size_t size() const = 0;
527 
530  virtual bool containsLabel(const MDLabel label) const = 0;
531 
537  virtual bool addLabel(const MDLabel label, int pos = -1) = 0;
538 
543  virtual bool removeLabel(const MDLabel label) = 0;
544 
548  void keepLabels(const std::vector<MDLabel> &labels);
549 
556  virtual size_t addObject() = 0;
557 
568  virtual void importObject(const MetaData &md, const size_t id, bool doClear=true) = 0;
569  virtual void importObjects(const MetaData &md, const std::vector<size_t> &objectsToAdd, bool doClear=true) = 0;
570  virtual void importObjects(const MetaData &md, const MDQuery &query, bool doClear=true) = 0;
571 
576  virtual bool removeObject(size_t id) = 0;
577 
582  virtual void removeObjects(const std::vector<size_t> &toRemove) = 0;
583 
590  virtual int removeObjects(const MDQuery&) = 0;
591  virtual int removeObjects() = 0;
592 
600  virtual size_t firstRowId() const = 0;
601  virtual size_t firstObject(const MDQuery&) const = 0;
602 
604  virtual size_t lastRowId() const = 0;
605 
614  virtual void findObjects(std::vector<size_t> &objectsOut, const MDQuery &query) const = 0;
615  virtual void findObjects(std::vector<size_t> &objectsOut, int limit = -1) const = 0;
616 
617  virtual size_t countObjects(const MDQuery&) const = 0;
618  virtual bool containsObject(size_t objectId) const = 0;
619  virtual bool containsObject(const MDQuery&) const = 0;
620 
627  virtual void _writeRows(std::ostream &os) const = 0;
628 
629  virtual void write(const FileName &outFile, WriteModeMetaData mode=MD_OVERWRITE) const = 0;
630  virtual void write(std::ostream &os, const String & blockName="",WriteModeMetaData mode=MD_OVERWRITE) const = 0;
631  virtual void print() const { this->write(std::cout); }
632 
640  void append(const FileName &outFile) const;
641 
647  virtual void read(const FileName &inFile, const std::vector<MDLabel> *desiredLabels = nullptr, bool decomposeStack=true) = 0;
656  virtual void removeDisabled();
657 
659  virtual void selectRandomSubset(const MetaData &mdIn, size_t numberOfObjects,
660  const MDLabel sortLabel=MDL_OBJID) = 0;
661 
668  virtual void selectPart(const MetaData &mdIn, size_t startPosition, size_t numberOfObjects,
669  const MDLabel sortLabel=MDL_OBJID) = 0;
670 
676  friend struct MDBaseRowIterator<false>;
677  friend struct MDBaseRowIterator<true>;
678 
679  /* To allow iteration over abstract MetaData, there is a hierarchy of iterators.
680  - MDBaseRowIterator, MDVecRowIterator, MDDbRowIterator
681  - MDBaseIdIterator, MDVecIdIterator, MDDbIdIterator
682  * Plus there are rowIterator & idIterator, which are common for all instances
683  * and just hold unique_ptr to appropriate implementation.
684  */
685  template <bool IsConst>
686  struct rowIterator {
687  private:
688  std::unique_ptr<MDBaseRowIterator<IsConst>> impl;
689  public:
690  rowIterator(std::unique_ptr<MDBaseRowIterator<IsConst>> impl) : impl(std::move(impl)) {}
691  rowIterator(rowIterator const& right) : impl(std::move(right.impl->clone())) {}
693  impl = std::move(right.impl->clone());
694  return *this;
695  }
697  impl->increment();
698  return *this;
699  }
700  bool operator==(const rowIterator<IsConst>& other) const { return *(other.impl) == *(this->impl); }
701  bool operator!=(const rowIterator<IsConst>& other) const { return !(*this == other); }
703  };
704 
707 
708  virtual iterator begin() = 0;
709  virtual iterator end() = 0;
710 
711  virtual const_iterator begin() const = 0;
712  virtual const_iterator end() const = 0;
713 
714 
715  template <bool IsConst>
716  struct idIterator {
717  private:
718  std::unique_ptr<MDBaseIdIterator<IsConst>> impl;
719  public:
720  idIterator(std::unique_ptr<MDBaseIdIterator<IsConst>> impl) : impl(std::move(impl)) {}
721  idIterator(idIterator const& right) : impl(std::move(right.impl->clone())) {}
722  idIterator& operator=(idIterator const& right) {
723  impl = std::move(right.impl->clone());
724  return *this;
725  }
727  impl->increment();
728  return *this;
729  }
730  bool operator==(const idIterator<IsConst>& other) const { return *(other.impl) == *(this->impl); }
731  bool operator!=(const idIterator<IsConst>& other) const { return !(*this == other); }
732  size_t operator*() const { return **impl; }
733  };
734 
737 
738  // This proxy allows to implement non-const & const iterator in one templated class.
739  template <bool IsConst>
742 
746  };
747 
748  virtual id_iterator id_begin() = 0;
749  virtual id_iterator id_end() = 0;
750 
751  virtual id_const_iterator id_begin() const = 0;
752  virtual id_const_iterator id_end() const = 0;
753 
754  virtual IdIteratorProxy<false> ids() { return IdIteratorProxy<false>(*this); };
755  virtual IdIteratorProxy<true> ids() const { return IdIteratorProxy<true>(*this); };
756 
761  virtual void fillConstant(MDLabel label, const String &value) = 0;
762 
770  virtual void fillRandom(MDLabel label, const String &mode, double op1, double op2, double op3=0.) = 0;
771 
773  virtual void fillLinear(MDLabel label, double initial, double step) = 0;
774 
778  virtual void copyColumn(MDLabel labelDest, MDLabel labelSrc) = 0;
779 
781  virtual void copyColumnTo(MetaData& md, MDLabel labelDest, MDLabel labelSrc) = 0;
782 
786  virtual void renameColumn(MDLabel oldLabel, MDLabel newLabel) = 0;
787 
791  virtual void renameColumn(const std::vector<MDLabel> &oldLabel,
792  const std::vector<MDLabel> &newLabel) = 0;
793 
794  virtual bool nextBlock(mdBuffer &buffer, mdBlock &block);
795 };//class MetaData
796 
800 std::ostream& operator<<(std::ostream& o, const MetaData & mD);
801 
808 
809 bool vectorContainsLabel(const std::vector<MDLabel>& labelsVector, const MDLabel label);
810 
811 #endif
ColumnDoesNotExist(const std::string &msg)
object id (int), NOTE: This label is special and shouldn&#39;t be used
Argument missing.
Definition: xmipp_error.h:114
virtual void setMaxRows(size_t maxRows=0)
virtual bool isColumnFormat() const
size_t operator*() const
char * begin
Definition: metadata_base.h:82
virtual void setPrecission(int _precision)
void getColumnValues(const MDLabel label, std::vector< T > &out) const
const int & getValue2(int) const
void write(std::ostream &os, const datablock &db)
Definition: cif2pdb.cpp:3747
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
idIterator & operator++()
ObjectDoesNotExist(size_t id, const FileName &fn)
char * end
Definition: metadata_base.h:96
HBITMAP buffer
Definition: svm-toy.cpp:37
idIterator(std::unique_ptr< MDBaseIdIterator< IsConst >> impl)
std::ostream & operator<<(std::ostream &o, const MetaData &mD)
rowIterator & operator++()
T getValueOrDefault(const MDLabel label, size_t id, T &_default)
virtual IdIteratorProxy< false > ids()
void getValueOrAbort(const MDLabel label, T &valueOut, size_t id) const
bool isMetadataFile
void getBlocksInMetaDataFile(const FileName &inFile, StringVector &blockList)
FileName _inFile
String _comment
std::vector< String > StringVector
Definition: xmipp_strings.h:35
#define i
virtual void setComment(const String &newComment="No comment")
Incorrect number of objects in Metadata.
Definition: xmipp_error.h:160
virtual void print() const
TypeHelpers::choose< IsConst, const MDRow &, MDRow & >::type operator*() const
const T & getValueOrAbort(const MDLabel label, size_t id) const
ObjectDoesNotExist(MDLabel label, size_t id)
rowIterator(std::unique_ptr< MDBaseRowIterator< IsConst >> impl)
TypeHelpers::choose< IsConst, const MetaData &, MetaData & >::type _md
ColumnDoesNotExist(MDLabel label, const FileName &fn)
virtual bool isEmpty() const
idIterator(idIterator const &right)
bool operator==(const rowIterator< IsConst > &other) const
std::map< String, size_t > _fastStringSearch
T getValue(const MDLabel label, size_t id) const
TypeHelpers::choose< IsConst, MetaData::id_const_iterator, MetaData::id_iterator >::type begin()
virtual void setFilename(const FileName &_filename)
bool vectorContainsLabel(const std::vector< MDLabel > &labelsVector, const MDLabel label)
void getValueOrDefault(const MDLabel label, T &valueOut, size_t id, const T1 &_default) const
void mode
idIterator & operator=(idIterator const &right)
size_t nameSize
Definition: metadata_base.h:95
char * loop
Definition: metadata_base.h:97
basic_istream< char, std::char_traits< char > > istream
Definition: utilities.cpp:815
bool operator!=(const rowIterator< IsConst > &other) const
std::vector< T > getColumnValues(const MDLabel label) const
bool setValueCol(const MDLabel label, const T &valueIn)
virtual size_t getParsedLines()
char * begin
Definition: metadata_base.h:94
const T getValueOrDefault(const MDLabel label, size_t id, const T &_default) const
IdIteratorProxy(typename TypeHelpers::choose< IsConst, const MetaData &, MetaData &>::type md)
bool setValue(const MDLabel label, const T &valueIn, size_t id)
virtual String getComment() const
virtual void setColumnFormat(bool column)
MetaData & operator=(const MetaData &md)
char isEmpty(const char *line)
Definition: tools.cpp:165
std::string String
Definition: xmipp_strings.h:34
rowIterator(rowIterator const &right)
bool operator==(const idIterator< IsConst > &other) const
bool operator!=(const idIterator< IsConst > &other) const
bool getValue(const MDLabel label, T &valueOut, size_t id) const
rowIterator & operator=(rowIterator const &right)
void setColumnValues(const MDLabel label, const std::vector< T > &valuesIn)
MDLabel _fastStringSearchLabel
static String label2Str(const MDLabel &label)
T & getValueOrAbort(const MDLabel label, size_t id)
TypeHelpers::choose< IsConst, MetaData::id_const_iterator, MetaData::id_iterator >::type end()
bool existsBlockInMetaDataFile(const FileName &inFile, const String &inBlock)
std::string to_string(bond_type bondType)
Definition: compound.cpp:43
file read(std::istream &is)
Definition: pdb2cif.cpp:6200
size_t size
Definition: metadata_base.h:83
WriteModeMetaData
int * n
NotImplemented(const std::string &str)
MDLabel
WriteModeMetaData metadataModeConvert(String mode)
virtual FileName getFilename() const
virtual IdIteratorProxy< true > ids() const
#define BAD_OBJID
Definition: metadata_base.h:55