Xmipp  v3.23.11-Nereus
metadata_vec.cpp
Go to the documentation of this file.
1 /**************************************************************************
2  i
3  * Authors: J.R. Bilbao-Castro (jrbcast@ace.ual.es)
4  * Jan Horacek (xhorace4@fi.muni.cz)
5  *
6  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21  * 02111-1307 USA
22  *
23  * All comments concerning this program package may be sent to the
24  * e-mail address 'xmipp@cnb.csic.es'
25  ***************************************************************************/
26 
27 #include <algorithm>
28 #include <cassert>
29 #include <fstream>
30 #include <random>
31 #include "metadata_vec.h"
32 #include "metadata_generator.h"
33 #include "xmipp_image.h"
34 
35 #include <sys/stat.h>
36 #include <fcntl.h>
37 #ifdef XMIPP_MMAP
38 #include <sys/mman.h>
39 #endif
40 #include "xmipp_funcs.h"
41 
43  init({});
44 }
45 
46 MetaDataVec::MetaDataVec(const std::vector<MDLabel> &labelsVector) {
47  init(labelsVector);
48 }
49 
50 MetaDataVec::MetaDataVec(const FileName &fileName, const std::vector<MDLabel> &desiredLabels) {
51  init(desiredLabels);
52  read(fileName);
53 }
54 
56  init({});
57  read(fileName);
58 }
59 
61  init({});
63 }
64 
65 
66 void MetaDataVec::init(const std::vector<MDLabel> &labelsVector) {
67  this->clear();
69  _col_to_label.push_back(MDL_OBJID);
70  size_t col = 1;
71  for (const auto label : labelsVector) {
72  if (label != MDL_OBJID) {
73  _label_to_col[label] = col;
74  _col_to_label.push_back(label);
75  col++;
76  }
77  }
78  _no_columns = col;
79 }
80 
82  return this->_label_to_col[label];
83 }
84 
85 const MDObject& MetaDataVec::_getObject(size_t i, MDLabel label) const {
86  return this->_getObject(this->_rows.at(i), label);
87 }
88 
90  return this->_getObject(this->_rows.at(i), label);
91 }
92 
93 const MDObject& MetaDataVec::_getObject(const MetaDataVecRow& row, MDLabel label) const {
94  int labelIndex = this->_labelIndex(label);
95  if ((labelIndex < 0) || (static_cast<size_t>(labelIndex) >= row.size()))
96  throw ColumnDoesNotExist(label, getFilename());
97  return row.at(labelIndex);
98 }
99 
101  int labelIndex = this->_labelIndex(label);
102  if (labelIndex < 0)
103  throw ColumnDoesNotExist(label, getFilename());
104  return row.at(labelIndex);
105 }
106 
107 int MetaDataVec::_rowIndex(size_t id) const {
108  if (this->_id_to_index.find(id) == this->_id_to_index.end())
109  return -1;
110  return this->_id_to_index.at(id);
111 }
112 
113 size_t MetaDataVec::_rowIndexSafe(size_t id) const {
114  int i = this->_rowIndex(id);
115  if (i == -1)
116  throw ObjectDoesNotExist(id, getFilename());
117  return i;
118 }
119 
120 void MetaDataVec::read(const FileName &filename, const std::vector<MDLabel> *desiredLabels, bool decomposeStack) {
121  String blockName;
122  FileName inFile;
123 
124  blockName = filename.getBlockName();
125  inFile = filename.removeBlockName();
126  String extFile = filename.getExtension();
127  blockName = escapeForRegularExpressions(blockName);
128 
129  this->clear();
130  this->setColumnFormat(true);
131 
132  if (extFile == "xml")
133  this->readXML(_inFile, desiredLabels, blockName, decomposeStack);
134  else if (extFile == "sqlite")
135  throw NotImplemented("Reading from .sqlite file into MetaDataVec not implemented!");
136  else
137  this->readStar(filename, desiredLabels, blockName, decomposeStack);
138 }
139 
140 void MetaDataVec::write(const FileName &outFile, WriteModeMetaData mode) const {
141  String blockName;
142  FileName extFile;
143  FileName _outFile;
144 
145  blockName = outFile.getBlockName();
146  if (blockName.empty())
147  blockName = DEFAULT_BLOCK_NAME;
148  _outFile = outFile.removeBlockName();
149  extFile = outFile.getExtension();
150 
151  if (extFile == "xml") {
152  writeXML(_outFile, blockName, mode);
153  } else if (extFile == "sqlite") {
154  throw NotImplemented("Writing to .sqlite file from MetaDataVec not implemented!");
155  } else {
156  writeStar(_outFile, blockName, mode);
157  }
158 }
159 
160 void MetaDataVec::writeXML(const FileName fn, const FileName blockname, WriteModeMetaData mode) const {
161  // FIXME: implement
162  throw NotImplemented("writeXML not implemented");
163 }
164 
165 void MetaDataVec::writeText(const FileName fn, const std::vector<MDLabel>* desiredLabels) const {
166  // FIXME: implement
167  throw NotImplemented("writeText not immplemented");
168 }
169 
171  MetaData::clear();
172  this->_rows.clear();
173  std::fill(this->_label_to_col.begin(), this->_label_to_col.end(), -1);
174  this->_col_to_label.clear();
175  this->_id_to_index.clear();
176  this->_no_columns = 0;
177  this->_next_id = 1;
178 }
179 
180 void MetaDataVec::_setRow(const MDRow &row, size_t index) {
181  if (dynamic_cast<const MDRowVec*>(&row) != nullptr) {
182  // No not change same row
183  const MDRowVec& mdRowVec = dynamic_cast<const MDRowVec&>(row);
184  if ((mdRowVec._in_metadata) && (mdRowVec._rowi == index) && (mdRowVec._row == &this->_rows[index]))
185  return;
186  }
187 
188  size_t newRowSize = 0;
189  for (size_t labeli = 0; labeli < MDL_LAST_LABEL; ++labeli) {
190  MDLabel label = static_cast<MDLabel>(labeli);
191  if (row.containsLabel(label)) {
192  if (!this->containsLabel(label))
193  newRowSize = this->size();
194  else if (this->_label_to_col[label]+1 > newRowSize)
195  newRowSize = this->_label_to_col[label]+1;
196  }
197  }
198 
199  for (size_t column = 0; (column < this->_col_to_label.size()) && (column < newRowSize); ++column)
200  if ((this->_col_to_label[column] != MDL_OBJID) && (!row.containsLabel(this->_col_to_label[column])))
201  throw ColumnDoesNotExist("New row does not contain required MetaData column: "+
202  MDL::label2Str(this->_col_to_label[column])+"!");
203 
204  for (size_t labeli = 0; labeli < MDL_LAST_LABEL; ++labeli) {
205  MDLabel label = static_cast<MDLabel>(labeli);
206  if (row.containsLabel(label) && !this->containsLabel(label))
207  this->addLabel(label);
208  }
209 
210  MetaDataVecRow& editRow = this->_rows[index];
211  editRow.clear();
212 
213  for (size_t coli = 0; coli < this->_no_columns; coli++)
214  editRow.push_back({this->_col_to_label[coli]});
215 
216  for (size_t labeli = 0; labeli < MDL_LAST_LABEL; ++labeli) {
217  MDLabel label = static_cast<MDLabel>(labeli);
218  if (row.containsLabel(label)) {
219  size_t ourCol = _label_to_col[label];
220  row.getValue(editRow[ourCol]);
221  }
222  }
223 }
224 
225 size_t MetaDataVec::addRow(const MDRow &row) {
226  /* Id:
227  * When ‹row› does not contain MDL_OBJID column, it is created from ‹this->_nextId›
228  * When ‹row› contains id which is NOT present in Metadata, id is kept.
229  * When ‹row› contains id which IS present in Metadata, id is changed from ‹this->_nextId›.
230  * When ‹this->_nextId› is present in MetaData, assert fails. This should not happen.
231  */
232 
233  MetaDataVecRow newRow;
234  _rows.emplace_back(newRow);
235  this->_setRow(row, _rows.size()-1);
236 
237  if ((!row.containsLabel(MDL_OBJID)) || (row.getValue<size_t>(MDL_OBJID) == BAD_OBJID)) {
238  MetaDataVecRow& _row = this->_rows[_rows.size()-1];
239  if (!this->containsLabel(MDL_OBJID))
240  this->addLabel(MDL_OBJID);
241  this->_expand(_row, MDL_OBJID);
242  _row[this->_labelIndex(MDL_OBJID)] = MDObject(MDL_OBJID, this->_next_id);
243  }
244 
245  size_t rowId = getRowId(_rows.size()-1);
246 
247  if (this->_id_to_index.find(rowId) != this->_id_to_index.end()) {
248  MetaDataVecRow& _row = this->_rows[_rows.size()-1];
249  _row[this->_labelIndex(MDL_OBJID)] = MDObject(MDL_OBJID, this->_next_id);
250  rowId = this->_next_id;
251  }
252 
253  assert(this->_id_to_index.find(rowId) == this->_id_to_index.end());
254 
255  if (rowId >= this->_next_id)
256  this->_next_id = rowId+1;
257  this->_id_to_index[rowId] = _rows.size()-1;
258  return rowId;
259 }
260 
261 void MetaDataVec::addRows(const std::vector<MDRowVec> &rows) {
262  for (const auto row : rows)
263  this->addRow(row);
264 }
265 
266 int MetaDataVec::getMaxStringLength(const MDLabel thisLabel) const {
267  return 255;
268 }
269 
270 bool MetaDataVec::setValueCol(const MDObject &mdValueIn) {
271  const auto &label = mdValueIn.label;
272  int labelIndex = this->_labelIndex(label);
273  if (labelIndex < 0) {
274  this->addLabel(label);
275  labelIndex = this->_labelIndex(label);
276  for (auto &r : _rows) _expand(r, labelIndex);
277  }
278  for (auto &r : _rows) r[labelIndex] = mdValueIn;
279  return true;
280 }
281 
282 bool MetaDataVec::setValue(const MDObject &mdValueIn, size_t id) {
283  if (!this->containsLabel(mdValueIn.label))
284  this->addLabel(mdValueIn.label);
285 
286  MetaDataVecRow& row = this->_rows[this->_rowIndexSafe(id)];
287  this->_expand(row, mdValueIn.label);
288 
289  row[this->_labelIndex(mdValueIn.label)] = mdValueIn;
290  return true;
291 }
292 
293 bool MetaDataVec::getValue(MDObject &mdValueOut, size_t id) const {
294  try {
295  mdValueOut = this->_getObject(this->_rowIndexSafe(id), mdValueOut.label);
296  } catch (const ColumnDoesNotExist&) {
297  return false;
298  }
299  return true;
300 }
301 
303  return this->_getObject(this->_rowIndexSafe(id), label);
304 }
305 
306 const MDObject &MetaDataVec::getValue(MDLabel label, size_t id) const {
307  return this->_getObject(this->_rowIndexSafe(id), label);
308 }
309 
310 std::unique_ptr<MDRow> MetaDataVec::getRow(size_t id) {
311  int i = this->_rowIndex(id);
312  if (i < 0)
313  return nullptr;
314  return memoryUtils::make_unique<MDRowVec>(
315  this->_rows[i], i, this->_label_to_col, this->_col_to_label, this->_no_columns
316  );
317 }
318 
319 std::unique_ptr<const MDRow> MetaDataVec::getRow(size_t id) const {
320  int i = this->_rowIndex(id);
321  if (i < 0)
322  return nullptr;
323  return memoryUtils::make_unique<MDRowVec>(
324  this->_rows[i], i, this->_label_to_col, this->_col_to_label, this->_no_columns
325  );
326 }
327 
329  size_t i = this->_rowIndexSafe(id);
330  return MDRowVec(this->_rows[i], i, this->_label_to_col, this->_col_to_label, this->_no_columns);
331 }
332 
333 const MDRowVec MetaDataVec::getRowVec(size_t id) const {
334  size_t i = this->_rowIndexSafe(id);
335  return MDRowVec(this->_rows[i], i, this->_label_to_col, this->_col_to_label, this->_no_columns);
336 }
337 
338 void MetaDataVec::getRow(MDRowVec &row, size_t id) {
339  size_t i = this->_rowIndexSafe(id);
340  row = MDRowVec(this->_rows[i], i, this->_label_to_col, this->_col_to_label, this->_no_columns);
341 }
342 
343 bool MetaDataVec::getRowValues(size_t id, std::vector<MDObject> &values) const {
344  int i = this->_rowIndex(id);
345  if (i < 0)
346  return false;
347  values = this->_rows[i];
348  return true;
349 }
350 
351 size_t MetaDataVec::getRowId(size_t i) const {
352  return this->_getObject(i, MDL_OBJID).getValue2(size_t());
353 }
354 
355 size_t MetaDataVec::getRowId(const MetaDataVecRow& row) const {
356  int labelIndex = _labelIndex(MDL_OBJID);
357  if (labelIndex < 0)
359  return row.at(labelIndex).getValue2(size_t());
360 }
361 
362 void MetaDataVec::getColumnValues(const MDLabel label, std::vector<MDObject> &valuesOut) const {
363  valuesOut.clear();
364  int labelIndex = this->_labelIndex(label);
365  if (labelIndex < 0)
366  throw ColumnDoesNotExist(label, getFilename());
367  for (const auto& vecRow : this->_rows)
368  valuesOut.emplace_back(vecRow.at(labelIndex));
369 }
370 
371 void MetaDataVec::setColumnValues(const std::vector<MDObject> &valuesIn) {
372  for (size_t i = 0; i < std::min(valuesIn.size(), this->_rows.size()); i++) {
373  int labelIndex = this->_labelIndex(valuesIn[i].label);
374  if (labelIndex < 0)
375  this->addLabel(valuesIn[i].label);
376  labelIndex = this->_labelIndex(valuesIn[i].label);
377  if (labelIndex < 0)
378  throw ColumnDoesNotExist(valuesIn[i].label, getFilename());
379  this->_expand(this->_rows[i], valuesIn[i].label);
380  this->_rows[i][labelIndex] = valuesIn[i];
381  }
382 }
383 
384 bool MetaDataVec::setRow(const MDRow &row, size_t id) {
385  this->_setRow(row, this->_rowIndexSafe(id));
386  return true;
387 }
388 
389 bool MetaDataVec::isEmpty() const {
390  return this->_rows.empty();
391 }
392 
393 size_t MetaDataVec::size() const {
394  return this->_rows.size();
395 }
396 
397 bool MetaDataVec::containsLabel(const MDLabel label) const {
398  return this->_labelIndex(label) > -1;
399 }
400 
401 bool MetaDataVec::addLabel(const MDLabel label, int pos) {
402  if (pos != -1)
403  throw NotImplemented("addLabel to -1 not implemented");
404  if (this->_label_to_col[label] != -1)
405  return true;
406 
407  this->_no_columns++;
408  size_t column = this->_no_columns-1;
409  this->_label_to_col[label] = column;
410  this->_col_to_label.emplace_back(label);
411  return true;
412 }
413 
415  if (this->_label_to_col[label] == -1)
416  return false;
417  int column = this->_label_to_col[label];
418 
419  for (auto& vecRow : this->_rows)
420  if (static_cast<int>(vecRow.size()) > column)
421  vecRow.erase(vecRow.begin()+column); // this is expensive
422 
423  // FIXME: test this properly
424  this->_label_to_col[label] = -1;
425  for (size_t i = 0; i < MDL_LAST_LABEL; i++) {
426  if (this->_label_to_col[i] > column) {
427  this->_label_to_col[i]--;
428  this->_col_to_label[this->_label_to_col[i]] = MDLabel(i);
429  }
430  }
431  this->_no_columns--;
432  return true;
433 }
434 
436  MDRowVec row;
437  row.setValue(MDObject(MDL_OBJID, this->_next_id));
438  return this->addRow(row);
439 }
440 
441 void MetaDataVec::importObject(const MetaData &md, const size_t id, bool doClear) {
442  if (doClear) {
443  this->clear();
444  this->copyInfo(md);
445  }
446 
447  std::unique_ptr<const MDRow> row = md.getRow(id);
448  if (row == nullptr)
449  return;
450 
451  MDRowVec newRow;
452  for (const MDObject* obj : *row)
453  if (obj->label != MDL_OBJID)
454  newRow.setValue(*obj);
455  this->addRow(newRow);
456 }
457 
458 void MetaDataVec::importObjects(const MetaData &md, const std::vector<size_t> &objectsToAdd, bool doClear) {
459  if (doClear) {
460  this->clear();
461  this->copyInfo(md);
462  }
463 
464  for (size_t objId : objectsToAdd)
465  this->importObject(md, objId, false);
466 }
467 
468 void MetaDataVec::importObjects(const MetaData &md, const MDQuery &query, bool doClear) {
469  // FIXME: move this to MetaData?
470  std::vector<size_t> ids;
471  md.findObjects(ids, query);
472  this->importObjects(md, ids, doClear);
473 }
474 
475 bool MetaDataVec::removeObject(size_t id) {
476  int i = this->_rowIndex(id);
477  if (i < 0)
478  return false;
479 
480  this->_id_to_index.erase(id);
481  this->_rows.erase(this->_rows.begin()+i);
482 
483  for (size_t j = i; j < this->_rows.size(); j++)
484  this->_id_to_index[this->getRowId(j)]--;
485 
486  return true;
487 }
488 
489 void MetaDataVec::removeObjects(const std::vector<size_t> &toRemove) {
490  for (size_t id : toRemove)
491  this->removeObject(id);
492 }
493 
495  size_t count = this->size();
496  this->clear();
497  return count;
498 }
499 
501  // FIXME: move this to MetaData?
502  std::vector<size_t> ids;
503  this->findObjects(ids, query);
504  this->removeObjects(ids);
505  return true;
506 }
507 
508 size_t MetaDataVec::firstRowId() const {
509  return this->getRowId(0);
510 }
511 
512 size_t MetaDataVec::lastRowId() const {
513  return this->getRowId(this->size()-1);
514 }
515 
516 bool MetaDataVec::_match(const MetaDataVecRow& row, const MDQuery& query) const {
517  if (dynamic_cast<const MDMultiQuery*>(&query) != nullptr) {
518  // Process MDMultiQuery
519  const MDMultiQuery& mq = dynamic_cast<const MDMultiQuery&>(query);
520  if (mq.operations.size() == 0)
521  return true; // emptuy query matches always
522  String operation = mq.operations[0];
523  for (const auto& op : mq.operations)
524  assert(op == operation); // support only all same operations
525  for (const auto& q : mq.queries) {
526  if ((operation == "AND") && (!this->_match(row, *q)))
527  return false;
528  else if ((operation == "OR") && (this->_match(row, *q)))
529  return true;
530  }
531  return (operation == "AND"); // false for "OR", true for "AND"
532  }
533 
534  if (dynamic_cast<const MDValueRelational*>(&query) == nullptr)
535  throw NotImplemented("_match for this type of query not implemented");
536  // MDValueRange, MDExpression, MDMultiQuery not implemented yet
537  // MDExpression will probably never be supported as it is raw SQL expression
538 
539  const MDValueRelational& rel = dynamic_cast<const MDValueRelational&>(query);
540 
541  if (rel.value == nullptr)
542  return false;
543 
544  size_t labeli = this->_labelIndex(rel.value->label);
545  if (labeli >= row.size())
546  return false;
547 
548  const MDObject& mdObj = row[labeli];
549 
550  if (rel.op == RelationalOp::EQ)
551  return *(rel.value) == mdObj;
552  if (rel.op == RelationalOp::NE)
553  return *(rel.value) != mdObj;
554  if (rel.op == RelationalOp::GT) // FIXME: check if < & > are not swapped
555  return *(rel.value) < mdObj;
556  if (rel.op == RelationalOp::GE)
557  return *(rel.value) <= mdObj;
558  if (rel.op == RelationalOp::LT)
559  return *(rel.value) > mdObj;
560  if (rel.op == RelationalOp::LE)
561  return *(rel.value) >= mdObj;
562 
563  throw std::logic_error("MetaDataVec::_match: unknown operator");
564 }
565 
566 size_t MetaDataVec::firstObject(const MDQuery& query) const {
567  // FIXME: should first be first in _rows order or ids order?
568  for (const MetaDataVecRow& row : this->_rows)
569  if (this->_match(row, query))
570  return this->getRowId(row);
571  return BAD_OBJID;
572 }
573 
574 void MetaDataVec::findObjects(std::vector<size_t> &objectsOut, const MDQuery &query) const {
575  // FIXME: should first be first in _rows order or ids order?
576  objectsOut.clear();
577  for (const MetaDataVecRow& row : this->_rows)
578  if (this->_match(row, query))
579  objectsOut.emplace_back(this->getRowId(row));
580 }
581 
582 void MetaDataVec::findObjects(std::vector<size_t> &objectsOut, int limit) const {
583  objectsOut.clear();
584  for (size_t i = 0; i < std::min<size_t>(limit, this->size()); i++)
585  objectsOut.emplace_back(this->getRowId(this->_rows[i]));
586 }
587 
588 size_t MetaDataVec::countObjects(const MDQuery& query) const {
589  size_t count = 0;
590  for (const MetaDataVecRow& row : this->_rows)
591  if (this->_match(row, query))
592  count++;
593  return count;
594 }
595 
596 bool MetaDataVec::containsObject(size_t objectId) const {
597  return this->_rowIndex(objectId) > -1;
598 }
599 
600 bool MetaDataVec::containsObject(const MDQuery& query) const {
601  for (const MetaDataVecRow& row : this->_rows)
602  if (this->_match(row, query))
603  return true;
604  return false;
605 }
606 
607 bool MetaDataVec::containsObject(size_t objectId) {
608  return this->_id_to_index.find(objectId) != this->_id_to_index.end();
609 }
610 
611 
612 void MetaDataVec::_writeRows(std::ostream &os) const {
613  for (const MetaDataVecRow& row : this->_rows) {
614  for (size_t i = 0; i < MDL_LAST_LABEL; i++) {
615  const MDLabel label = static_cast<MDLabel>(i);
616  if ((label != MDL_STAR_COMMENT) && (label != MDL_OBJID) && (this->_label_to_col[i] > -1)) {
617  os.width(1);
618  if (this->_labelIndex(label) < static_cast<int>(row.size()))
619  this->_getObject(row, label).toStream(os, true);
620  else
621  throw ColumnDoesNotExist(label, getFilename());
622  os << " ";
623  }
624  }
625  os << '\n';
626  }
627 }
628 
629 void MetaDataVec::write(std::ostream &os, const String &blockName, WriteModeMetaData mode) const {
630  if (mode==MD_OVERWRITE)
631  os << FileNameVersion << " * "// << (isColumnFormat ? "column" : "row")
632  << '\n' // write which type of format (column or row) and the path;
633  << WordWrap(this->_comment, line_max); // write md comment in the 2nd comment line of header
634 
635  // write data block
636  String _szBlockName("data_");
637  _szBlockName += blockName;
638 
639  if (this->isColumnFormat()) {
640  // write md columns in 3rd comment line of the header
641  os << _szBlockName << '\n';
642  os << "loop_" << '\n';
643  for (size_t i = 0; i < MDL_LAST_LABEL; i++)
644  if ((i != MDL_STAR_COMMENT) && (i != MDL_OBJID) && (this->_label_to_col[i] > -1))
645  os << " _" << MDL::label2Str(static_cast<MDLabel>(i)) << '\n';
646  _writeRows(os);
647 
648  //Put the activeObject to the first, if exists
649  } else { // row format
650  os << _szBlockName << '\n';
651 
652  // Print single object
653  assert(this->_rows.size() == 1);
654 
655  for (size_t i = 0; i < MDL_LAST_LABEL; i++) {
656  const MDLabel label = static_cast<MDLabel>(i);
657  if ((label != MDL_STAR_COMMENT) && (label != MDL_OBJID) && (this->_label_to_col[i] > -1)) {
658  os << " _" << MDL::label2Str(label) << " ";
659  if (this->_labelIndex(label) < static_cast<int>(this->_rows[0].size()))
660  this->_getObject(this->_rows[0], label).toStream(os);
661  else
662  throw ColumnDoesNotExist(label, getFilename());
663  os << '\n';
664  }
665  }
666  }
667 }
668 
669 void MetaDataVec::_parseObjects(std::istream &is, std::vector<MDObject*> &columnValues,
670  const std::vector<MDLabel> *desiredLabels, bool firstTime) {
671  for (size_t i = 0; i < columnValues.size(); i++) {
672  columnValues[i]->fromStream(is);
673 
674  if (is.fail()) {
675  String errorMsg = formatString("MetaData: Error parsing column '%s' value.",
676  MDL::label2Str(columnValues[i]->label).c_str());
677  columnValues[i]->failed = true;
678  std::cerr << "WARNING: " << errorMsg << std::endl;
679  //REPORT_ERROR(ERR_MD_BADLABEL, (String)"read: Error parsing data column, expecting " + MDL::label2Str(object.label));
680  } else if (firstTime) {
681  // Check if current column label exists.
682  if (columnValues[i]->label != MDL_UNDEFINED) {
683  // If there are no desired labels then add all.
684  bool reallyAdd=false;
685  if (desiredLabels == NULL) {
686  reallyAdd = true;
687  } else {
688  // Check if current column belongs to desired labels.
689  for (size_t j = 0; j < desiredLabels->size(); ++j) {
690  if ((*desiredLabels)[j] == columnValues[i]->label) {
691  reallyAdd = true;
692  break;
693  }
694  }
695  }
696 
697  // Add label if not exists.
698  if (reallyAdd)
699  this->addLabel(columnValues[i]->label);
700  }
701  }
702  }
703 
704  // Insert elements in DB.
705  MDRowVec newRow;
706  for (size_t i = 0; i < columnValues.size(); i++)
707  if (columnValues[i] != nullptr)
708  newRow.setValue(*columnValues[i]);
709  this->addRow(newRow);
710 }
711 
712 void MetaDataVec::readXML(const FileName &inFile, const std::vector<MDLabel> *desiredLabels, const String & blockRegExp, bool decomposeStack) {
713  REPORT_ERROR(ERR_NOT_IMPLEMENTED, "readXML not implemented yet");
714 }
715 
716 void MetaDataVec::readPlain(const FileName &inFile, const String &labelsString, const String &separator) {
717  // TODO
718  throw NotImplemented("readPlain not implemented");
719 }
720 
721 void MetaDataVec::addPlain(const FileName &inFile, const String &labelsString, const String &separator) {
722  // TODO
723  throw NotImplemented("addPlain not implemented");
724 }
725 
727  // TODO
728  throw NotImplemented("getColumnMax not implemented");
729 }
730 
732  // TODO
733  throw NotImplemented("getColumnMin not implemented");
734 }
735 
736 void MetaDataVec::replace(const MDLabel label, const String &oldStr, const String &newStr) {
737  // TODO
738  throw NotImplemented("replace not implemented");
739 }
740 
741 void MetaDataVec::randomize(const MetaData &MDin) {
742  *this = MDin;
743  std::random_device rd;
744  auto g = std::mt19937(rd());
745  std::shuffle(this->_rows.begin(), this->_rows.end(), g);
746  this->_recalc_id_to_index();
747 }
748 
750  this->_id_to_index.clear();
751  for (size_t i = 0; i < this->_rows.size(); i++)
752  this->_id_to_index[this->getRowId(i)] = i;
753 }
754 
756  *this = MDin; // FIXME: maybe join?
757  std::vector<MetaDataVecRow> new_rows;
758  for (const auto& row : this->_rows) {
759  if (!this->_contains(new_rows, row))
760  new_rows.emplace_back(row);
761  }
762  this->_rows = new_rows;
763 }
764 
765 bool MetaDataVec::_contains(const std::vector<MetaDataVecRow>& rows, const MetaDataVecRow& row) const {
766  for (const auto& _row : rows)
767  if (this->_rowsEq(row, _row))
768  return true;
769  return false;
770 }
771 
773  for (size_t label = 0; label < MDL_LAST_LABEL; label++) {
774  if ((label == MDL_COMMENT) || (label == MDL_OBJID))
775  continue;
776 
777  int labeli = this->_labelIndex(static_cast<MDLabel>(label));
778  if (labeli > -1) { // label is active
779  if ((static_cast<size_t>(labeli) < a.size()) !=
780  (static_cast<size_t>(labeli) < b.size()))
781  return false; // item present in one row, but not other
782  if (static_cast<size_t>(labeli) >= a.size())
783  continue; // label not present in both rows
784  if (!a[labeli].eq(b[labeli], this->precision()))
785  return false; // MDObjects are diffrent
786  }
787  }
788  return true;
789 }
790 
791 void MetaDataVec::sort(const MetaDataVec &MDin, const MDLabel sortLabel, bool asc, int limit, int offset) {
792  *this = MDin;
793 
794  int label_index = this->_labelIndex(sortLabel);
795  if (label_index > -1) {
796  std::sort(this->_rows.begin(), this->_rows.end(),
797  [label_index, asc](const MetaDataVecRow &a, const MetaDataVecRow &b) {
798  if (asc)
799  return a[label_index] < b[label_index];
800  return a[label_index] > b[label_index];
801  }
802  );
803 
804  this->_rows.erase(this->_rows.begin(), this->_rows.begin()+offset);
805  if ((limit > 0) && (limit < this->_rows.size()))
806  this->_rows.erase(this->_rows.begin()+limit, this->_rows.end());
807 
808  this->_recalc_id_to_index();
809 
810  }
811 }
812 
813 void MetaDataVec::sort(MetaDataVec &MDin, const String &sortLabel, bool asc, int limit, int offset) {
814  // TODO
815  throw NotImplemented("sort not implemented");
816 }
817 
818 void MetaDataVec::split(size_t parts, std::vector<MetaDataVec> &results, const MDLabel sortLabel) const {
819  if (parts > this->size())
820  REPORT_ERROR(ERR_MD, "MetaDataDb::split: Couldn't split a metadata in more parts than its size");
821 
822  MetaDataVec sorted;
823  sorted.sort(*this, sortLabel);
824 
825  results.clear();
826  results.resize(parts);
827  for (size_t i = 0; i < parts; i++) {
828  MetaDataVec &md = results[i];
829  size_t firsti, lasti;
830  divide_equally(sorted.size(), parts, i, firsti, lasti);
831  for (size_t j = firsti; j <= lasti; j++)
832  md.addRow(sorted.getRowVec(sorted.getRowId(j)));
833  }
834 }
835 
836 void MetaDataVec::selectRandomSubset(const MetaData &mdIn, size_t numberOfObjects, const MDLabel sortLabel) {
837  // TODO
838  throw NotImplemented("selectRandomSubset not implemented");
839 }
840 
841 void MetaDataVec::selectPart(const MetaData &mdIn, size_t startPosition, size_t numberOfObjects,
842  const MDLabel sortLabel) {
843  this->sort(mdIn, sortLabel, true, numberOfObjects, startPosition);
844 }
845 
846 /*void makeAbsPath(const MDLabel label=MDL_IMAGE);*/
847 
848 
849 void MetaDataVec::fillConstant(MDLabel label, const String &value) {
850  // FIXME: move to MetaData and use common MDGenerator?
851  MDConstGenerator generator(value);
852  generator.label = label;
853  generator.fill(*this);
854 }
855 
856 void MetaDataVec::fillRandom(MDLabel label, const String &mode, double op1, double op2, double op3) {
857  // FIXME: move to MetaData and use common MDGenerator?
858  MDRandGenerator generator(op1, op2, mode, op3);
859  generator.label = label;
860  generator.fill(*this);
861 }
862 
863 void MetaDataVec::fillLinear(MDLabel label, double initial, double step) {
864  // FIXME: move to MetaData and use common MDGenerator?
865  MDLinealGenerator generator(initial, step);
866  generator.label = label;
867  generator.fill(*this);
868 }
869 
870 void MetaDataVec::_expand(MetaDataVecRow& row, const MDLabel label) {
871  int labeli = this->_labelIndex(label);
872  if (labeli < 0)
873  this->addLabel(label);
874  this->_expand(row, this->_labelIndex(label));
875 }
876 
877 void MetaDataVec::_expand(MetaDataVecRow& row, size_t labeli) {
878  // In assert: all labels to labeli (including) must be present in
879  // this->_col_to_label.
880 
881  if (labeli < row.size())
882  return; // space for label already present
883 
884  for (size_t i = row.size(); i <= labeli; i++)
885  row.emplace_back(MDObject(this->_col_to_label.at(i)));
886 }
887 
888 void MetaDataVec::copyColumn(MDLabel labelDest, MDLabel labelSrc) {
889  int labelsrci = this->_labelIndex(labelSrc);
890  int labeldesti = this->_labelIndex(labelDest);
891  if (labelsrci < 0)
892  return;
893  if (labeldesti < 0)
894  this->addLabel(labelDest);
895  labeldesti = this->_labelIndex(labelDest);
896 
897  for (MetaDataVecRow& row : this->_rows) {
898  if (static_cast<size_t>(labeldesti) >= row.size())
899  this->_expand(row, labeldesti);
900 
901  if (static_cast<size_t>(labelsrci) < row.size()) {
902  row[labeldesti] = row[labelsrci];
903  row[labeldesti].label = labelDest;
904  }
905  // else row[labeldesti] is empty MDObject (from previous if)
906  }
907 }
908 
909 void MetaDataVec::copyColumnTo(MetaData& md, MDLabel labelDest, MDLabel labelSrc) {
910  // TODO
911  throw NotImplemented("copyColumnTo not implemented");
912 }
913 
914 void MetaDataVec::renameColumn(MDLabel oldLabel, MDLabel newLabel) {
915  assert(!this->containsLabel(newLabel));
916  int labeloldi = this->_labelIndex(oldLabel);
917  if (labeloldi < 0)
918  throw ColumnDoesNotExist(oldLabel, getFilename());
919 
920  this->_label_to_col[newLabel] = labeloldi;
921  this->_label_to_col[oldLabel] = -1;
922  this->_col_to_label[labeloldi] = newLabel;
923 
924  for (auto& row : this->_rows) {
925  if (labeloldi < static_cast<int>(row.size()))
926  row[labeloldi].label = newLabel;
927  }
928 }
929 
930 void MetaDataVec::renameColumn(const std::vector<MDLabel> &oldLabel,
931  const std::vector<MDLabel> &newLabel) {
932  // TODO
933  throw NotImplemented("renameColumn not implemented");
934 }
935 
936 
937 bool MetaDataVec::operator==(const MetaDataVec& op) const {
938  // This comparison ignores order of labels and row ids, everything else must be same.
939 
940  if (this->_rows.size() != op._rows.size())
941  return false;
942 
943  for (size_t labeli = 0; labeli < MDL_LAST_LABEL; labeli++)
944  if ((this->_label_to_col[labeli] > -1) != (op._label_to_col[labeli] > -1))
945  return false;
946 
947  for (size_t i = 0; i < this->_rows.size(); i++) {
948  for (size_t labeli = 0; labeli < MDL_LAST_LABEL; labeli++) {
949  if ((labeli == MDL_COMMENT) || (labeli == MDL_OBJID))
950  continue;
951 
952  int thisLabelColI = this->_label_to_col[labeli];
953  int opLabelColI = op._label_to_col[labeli];
954  if (thisLabelColI > -1) {
955  if ((static_cast<size_t>(thisLabelColI) < this->_rows[i].size()) !=
956  (static_cast<size_t>(opLabelColI) < op._rows[i].size()))
957  return false; // item present in one row, but not other
958  if (static_cast<size_t>(thisLabelColI) >= this->_rows[i].size())
959  continue; // label not present in both rows
960  if (!this->_rows[i][thisLabelColI].eq(op._rows[i][opLabelColI], this->precision()))
961  return false; // MDObjects are diffrent
962  }
963  }
964  }
965 
966  return true; // all rows same → ok
967 }
968 
969 std::vector<MDLabel> MetaDataVec::getActiveLabels() const {
970  std::vector<MDLabel> out;
971  for (size_t i = MDL_GATHER_ID; i < MDL_LAST_LABEL; i++) // ignore MDL_FIRST_LABEL = MDL_OBJID
972  if (this->_label_to_col[i] > -1)
973  out.emplace_back(static_cast<MDLabel>(i));
974  return out;
975 }
976 
977 std::ostream& operator<<(std::ostream& o, const MetaDataVec& md) {
978  md.write(o);
979  return o;
980 }
object id (int), NOTE: This label is special and shouldn&#39;t be used
virtual bool isColumnFormat() const
size_t lastRowId() const override
std::vector< String > operations
void init(const std::vector< MDLabel > &labelsVector)
std::vector< MDObject > MetaDataVecRow
Definition: metadata_vec.h:37
bool _match(const MetaDataVecRow &, const MDQuery &) const
void min(Image< double > &op1, const Image< double > &op2)
void removeDuplicates(MetaData &MDin, MDLabel label=MDL_UNDEFINED)
Case or algorithm not implemented yet.
Definition: xmipp_error.h:177
void sort(const MetaDataVec &MDin, const MDLabel sortLabel, bool asc=true, int limit=-1, int offset=0)
Not equal.
void setColumnValues(const std::vector< MDObject > &valuesIn) override
Less than.
const int & getValue2(int) const
void read(const FileName &inFile, const std::vector< MDLabel > *desiredLabels=nullptr, bool decomposeStack=true) override
String getBlockName() const
virtual void clear()
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
size_t _no_columns
Definition: metadata_vec.h:51
virtual void writeStar(const FileName &outFile, const String &blockName, WriteModeMetaData mode) const
doublereal * g
void readXML(const FileName &inFile, const std::vector< MDLabel > *desiredLabels=nullptr, const String &blockRegExp=DEFAULT_BLOCK_NAME, bool decomposeStack=true)
bool removeLabel(const MDLabel label) override
bool removeObject(size_t id) override
void writeText(const FileName fn, const std::vector< MDLabel > *desiredLabels) const override
void setValue(const MDObject &object) override
double getColumnMax(MDLabel column)
size_t divide_equally(size_t N, size_t size, size_t rank, size_t &first, size_t &last)
std::ostream & operator<<(std::ostream &o, const MetaDataVec &md)
int _rowIndex(size_t id) const
void replace(const MDLabel label, const String &oldStr, const String &newStr)
String WordWrap(const String &inputString, size_t lineLength)
void selectPart(const MetaData &mdIn, size_t startPosition, size_t numberOfObjects, const MDLabel sortLabel=MDL_OBJID) override
void split(size_t n, std::vector< MetaDataVec > &results, const MDLabel sortLabel=MDL_OBJID) const
size_t firstObject(const MDQuery &) const override
void write(const FileName &outFile, WriteModeMetaData mode=MD_OVERWRITE) const
void addPlain(const FileName &inFile, const String &labelsString, const String &separator=" ")
virtual IdIteratorProxy< false > ids()
String FileNameVersion
std::unique_ptr< MDRow > getRow(size_t id) override
void randomize(const MetaData &MDin)
FileName _inFile
String _comment
size_t size() const override
#define i
size_t addRow(const MDRow &row) override
String getExtension() const
void clear() override
void addRows(const std::vector< MDRowVec > &rows)
bool isEmpty() const override
double getColumnMin(MDLabel column)
std::unordered_map< size_t, size_t > _id_to_index
Definition: metadata_vec.h:52
doublereal * b
MDLabel label
T & getValue(MDLabel label)
void _recalc_id_to_index()
size_t _next_id
Definition: metadata_vec.h:53
viol index
bool setValue(const MDObject &mdValueIn, size_t id)
Equal.
size_t addObject() override
void renameColumn(MDLabel oldLabel, MDLabel newLabel) override
void readPlain(const FileName &inFile, const String &labelsString, const String &separator=" ")
size_t firstRowId() const override
void importObjects(const MetaData &md, const std::vector< size_t > &objectsToAdd, bool doClear=true) override
std::array< int, MDL_LAST_LABEL > _label_to_col
Definition: metadata_vec.h:49
void _writeRows(std::ostream &os) const override
void findObjects(std::vector< size_t > &objectsOut, const MDQuery &query) const override
Greater equal.
void eq(Image< double > &op1, const Image< double > &op2)
Be careful with integer images for relational operations...due to double comparisons.
MetaData error.
Definition: xmipp_error.h:154
void _expand(MetaDataVecRow &, const MDLabel)
void copyInfo(const MetaData &md)
Greater than.
bool getRowValues(size_t id, std::vector< MDObject > &values) const override
virtual std::unique_ptr< MDRow > getRow(size_t id)=0
virtual void findObjects(std::vector< size_t > &objectsOut, const MDQuery &query) const =0
void mode
void fillConstant(MDLabel label, const String &value) override
basic_istream< char, std::char_traits< char > > istream
Definition: utilities.cpp:815
void sort(struct DCEL_T *dcel)
Definition: sorting.cpp:18
int getMaxStringLength(const MDLabel thisLabel) const override
void importObject(const MetaData &md, const size_t id, bool doClear=true) override
int removeObjects() override
std::vector< MetaDataVecRow > _rows
Definition: metadata_vec.h:48
bool setValueCol(const MDObject &mdValueIn) override
#define j
size_t getRowId(const MetaDataVecRow &) const
bool getValue(MDObject &mdValueOut, size_t id) const override
bool containsObject(size_t objectId) const override
void selectRandomSubset(const MetaData &mdIn, size_t numberOfObjects, const MDLabel sortLabel=MDL_OBJID) override
bool addLabel(const MDLabel label, int pos=-1) override
void copyColumnTo(MetaData &md, MDLabel labelDest, MDLabel labelSrc) override
void getColumnValues(const MDLabel label, std::vector< MDObject > &valuesOut) const override
const MDObject & _getObject(size_t i, MDLabel label) const
virtual void setColumnFormat(bool column)
String escapeForRegularExpressions(const String &str)
MetaData & operator=(const MetaData &md)
double precision() const
FileName removeBlockName() const
void _setRow(const MDRow &row, size_t index)
virtual bool containsLabel(MDLabel label) const =0
int _labelIndex(MDLabel label) const
MDRowVec getRowVec(size_t id)
std::string String
Definition: xmipp_strings.h:34
#define line_max
comment is wraped in char_max length lines
String formatString(const char *format,...)
void toStream(std::ostream &os, bool withFormat=false, bool isSql=false, bool escape=true) const
void fillLinear(MDLabel label, double initial, double step) override
bool _contains(const std::vector< MetaDataVecRow > &, const MetaDataVecRow &) const
void writeXML(const FileName fn, const FileName blockname, WriteModeMetaData mode) const override
void(* obj)()
static String label2Str(const MDLabel &label)
bool _rowsEq(const MetaDataVecRow &a, const MetaDataVecRow &b) const
Less equal.
void _parseObjects(std::istream &is, std::vector< MDObject *> &columnValues, const std::vector< MDLabel > *desiredLabels, bool firstTime) override
bool containsLabel(const MDLabel label) const override
virtual void readStar(const FileName &filename, const std::vector< MDLabel > *desiredLabels, const String &blockRegExp, bool decomposeStack)
void fillRandom(MDLabel label, const String &mode, double op1, double op2, double op3=0.) override
size_t _rowIndexSafe(size_t id) const
void copyColumn(MDLabel labelDest, MDLabel labelSrc) override
bool setRow(const MDRow &row, size_t id)
#define DEFAULT_BLOCK_NAME
Definition: metadata_base.h:60
WriteModeMetaData
std::vector< MDLabel > _col_to_label
Definition: metadata_vec.h:50
MDLabel
doublereal * a
bool operator==(const MetaDataVec &op) const
virtual FileName getFilename() const
size_t countObjects(const MDQuery &) const override
#define BAD_OBJID
Definition: metadata_base.h:55
Serve to make annotations on the metadata row.
std::vector< const MDQuery * > queries
std::vector< MDLabel > getActiveLabels() const override
A comment for this object /*** NOTE THIS IS A SPECIAL CASE AND SO IS TREATED ***/.
void fill(MetaData &md)