Xmipp  v3.23.11-Nereus
metadata_db.cpp
Go to the documentation of this file.
1 /**************************************************************************
2  *
3  * Authors: J.R. Bilbao-Castro (jrbcast@ace.ual.es)
4  * Jan Horacek (xhorace4@fi.muni.cz)
5  *
6  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21  * 02111-1307 USA
22  *
23  * All comments concerning this program package may be sent to the
24  * e-mail address 'xmipp@cnb.csic.es'
25  ***************************************************************************/
26 
27 #include <fstream>
28 #include <random>
29 #include <algorithm>
30 #include <cassert>
31 
32 #include "metadata_db.h"
33 #include "xmipp_image.h"
34 #include "metadata_sql.h"
35 #include "metadata_generator.h"
36 #include "xmipp_funcs.h"
37 
38 
39 //-----Constructors and related functions ------------
40 void MetaDataDb::_clear(bool onlyData)
41 {
42  if (onlyData)
43  {
44  myMDSql->deleteObjects();
45  }
46  else
47  {
49  _activeLabels.clear();
50  myMDSql->clearMd();
51  }
52 }//close clear
53 
55 {
56  init({});
57 }
58 
59 void MetaDataDb::init(const std::vector<MDLabel> &labelsVector)
60 {
61  _clear();
62  _maxRows = 0; //by default read all rows
63  _parsedLines = 0; //no parsed line;
64  _activeLabels = labelsVector;
65  //Create table in database
66  myMDSql->createMd();
67  _precision = 100;
68  isMetadataFile = false;
69 }//close init
70 
71 void MetaDataDb::copyMetadata(const MetaDataDb &md, bool copyObjects)
72 {
73  if (this == &md) //not sense to copy same metadata
74  return;
75  init(md._activeLabels);
76  copyInfo(md);
77  if (!md._activeLabels.empty())
78  {
79  if (copyObjects)
80  md.myMDSql->copyObjects(this);
81  }
82  else
83  {
84  int n = md.size();
85  for (int i = 0; i < n; i++)
86  addObject();
87  }
88 }
89 
90 bool MetaDataDb::setValue(const MDObject &mdValueIn, size_t id)
91 {
92  if (id == BAD_OBJID)
93  {
94  REPORT_ERROR(ERR_MD_NOACTIVE, "setValue: please provide objId other than -1");
95  exit(1);
96  }
97  //add label if not exists, this is checked in addlabel
98  addLabel(mdValueIn.label);
99  return myMDSql->setObjectValue(id, mdValueIn);
100 }
101 
102 bool MetaDataDb::setValueCol(const MDObject &mdValueIn)
103 {
104  //add label if not exists, this is checked in addlabel
105  addLabel(mdValueIn.label);
106  return myMDSql->setObjectValue(mdValueIn);
107 }
108 
109 bool MetaDataDb::getValue(MDObject &mdValueOut, size_t id) const
110 {
111  if (!containsLabel(mdValueOut.label))
112  return false;
113 
114  if (id == BAD_OBJID)
115  REPORT_ERROR(ERR_MD_NOACTIVE, "getValue: please provide objId other than -1");
116 
117  return myMDSql->getObjectValue(id, mdValueOut);
118 }
119 
120 void MetaDataDb::getColumnValues(const MDLabel label, std::vector<MDObject> &valuesOut) const
121 {
122  MDObject mdValueOut(label);
123  std::vector<size_t> objectsId;
124  findObjects(objectsId);
125  size_t n = objectsId.size();
126  valuesOut.resize(n,mdValueOut);
127  for (size_t i = 0; i < n; ++i)
128  {
129  getValue(mdValueOut, objectsId[i]);
130  valuesOut[i] = mdValueOut;
131  }
132 }
133 
134 void MetaDataDb::setColumnValues(const std::vector<MDObject> &valuesIn)
135 {
136  bool addObjects=false;
137  if (size()==0)
138  addObjects=true;
139  if (valuesIn.size()!=size() && !addObjects)
140  REPORT_ERROR(ERR_MD_OBJECTNUMBER,"Input vector must be of the same size as the metadata");
141  if (!addObjects)
142  {
143  size_t n = 0;
144  for (size_t objId : this->ids())
145  setValue(valuesIn[n++], objId);
146  }
147  else
148  {
149  size_t nmax=valuesIn.size();
150  for (size_t n=0; n<nmax; ++n)
151  setValue(valuesIn[n],addObject());
152  }
153 }
154 
155 bool MetaDataDb::bindValue(size_t id) const
156 {
157  bool success=true;
158 
159  // Prepare statement.
160  if (!myMDSql->bindStatement( id))
161  {
162  success = false;
163  }
164 
165  return success;
166 }
167 
168 bool MetaDataDb::initGetRow(bool addWhereClause) const
169 {
170  bool success=true;
171 
172  // Prepare statement.
173  if (!myMDSql->initializeSelect( addWhereClause, this->_activeLabels))
174  {
175  success = false;
176  }
177 
178  return success;
179 }
180 
182 {
183  std::vector<MDObject> mdValues;
184  mdValues.reserve(this->_activeLabels.size());
185 
186  row.clear();
187 
188  bool success = myMDSql->getObjectsValues(this->_activeLabels, mdValues);
189  if (success)
190  for (const auto &obj : mdValues)
191  row.setValue(obj);
192 
193  return success;
194 }
195 
197 {
198  myMDSql->finalizePreparedStmt();
199 }
200 
201 std::vector<MDObject> MetaDataDb::getObjectsForActiveLabels() const {
202  // get active labels
203  std::vector<MDObject> values;
204  const auto &labels = this->_activeLabels;
205  values.reserve(labels.size());
206  for (auto &l : labels) {
207  values.emplace_back(l);
208  }
209  return values;
210 }
211 
212 bool MetaDataDb::getAllRows(std::vector<MDRowSql> &rows) const
213 {
214  std::vector<std::vector<MDObject>> rawRows;
215  rawRows.reserve(this->size());
216  auto columns = getObjectsForActiveLabels();
217  if ( ! sqlUtils::select(myMDSql->db,
218  myMDSql->tableName(myMDSql->tableId),
219  columns,
220  rawRows)) return false;
221 
222  rows.clear();
223  const auto noOfRows = rawRows.size();
224  rows.resize(noOfRows);
225  for (size_t i = 0; i < noOfRows; ++i) {
226  auto &row = rows.at(i);
227  const auto &vals = rawRows.at(i);
228  // fill the row
229  for (auto &v : vals) {
230  row.setValue(v);
231  }
232  }
233  return true;
234 }
235 
236 std::unique_ptr<MDRow> MetaDataDb::getRow(size_t id) {
237  std::unique_ptr<MDRowSql> row(new MDRowSql());
238  if (!getRow(*row, id))
239  return nullptr;
240  return std::move(row);
241 }
242 
243 std::unique_ptr<const MDRow> MetaDataDb::getRow(size_t id) const {
244  std::unique_ptr<MDRowSql> row(new MDRowSql());
245  if (!getRow(*row, id))
246  return nullptr;
247  return std::move(row);
248 }
249 
251  MDRowSql row;
252  if (!getRow(row, id))
253  throw ObjectDoesNotExist(id, getFilename());
254  return row;
255 }
256 
257 const MDRowSql MetaDataDb::getRowSql(size_t id) const {
258  MDRowSql row;
259  if (!getRow(row, id))
260  throw ObjectDoesNotExist(id, getFilename());
261  return row;
262 }
263 
264 bool MetaDataDb::getRow(MDRowSql &row, size_t id) const
265 {
266  if (id == BAD_OBJID)
267  REPORT_ERROR(ERR_MD_NOACTIVE, "getValue: please provide objId other than -1");
268  // clear whatever is there now
269  row.clear();
270  // get active labels
271  auto values = getObjectsForActiveLabels();
272  // get values from the row
273  if ( ! sqlUtils::select(id,
274  myMDSql->db,
275  myMDSql->tableName(myMDSql->tableId),
276  values)) return false;
277  // fill them
278  for (auto &v : values)
279  row.setValue(v);
280  return true;
281 }
282 
283 bool MetaDataDb::getRow2(MDRow &row, size_t id) const
284 {
285  bool success=true;
286 
287  // Clear row.
288  row.clear();
289 
290  // Initialize SELECT.
291  success = this->initGetRow( true);
292  if (success)
293  {
294  bindValue( id);
295 
296  // Execute SELECT.
297  success = execGetRow( row);
298 
299  // Finalize SELECT.
300  finalizeGetRow();
301  }
302 
303  return(success);
304 }
305 
306 bool MetaDataDb::setRow(const MDRow &row, size_t id)
307 {
308  if (row.empty()) {
309  return true;
310  }
311  addMissingLabels(row);
312 
313  // create mask of valid labels
314  std::vector<MDLabel> labels;
315  labels.reserve(row.size());
316  for (const MDObject* obj : row) {
317  labels.emplace_back(obj->label);
318  }
319  // extract values to be added
320  std::vector<const MDObject*> vals;
321  vals.reserve(row.size());
322  for (const auto &l : labels) {
323  vals.emplace_back(row.getObject(l));
324  }
325  // update values to db
326  return sqlUtils::update(vals, MDSql::db,
327  myMDSql->tableName(myMDSql->tableId), id);
328 }
329 
330 
332 {
333  int j=0; // Loop counter.
334  bool success=true; // Return value.
335  std::vector<MDLabel> labels; // Columns labels.
336  std::vector<MDObject*> mdValues; // Vector to store values.
337 
338  // Set vector size.
339  labels.resize(row.size());
340 
341  // Get labels.
342  j=0;
343  for (const MDObject* obj : row) {
344  addLabel(obj->label);
345  labels[j] = obj->label;
346  j++;
347  }
348  labels.resize(j);
349 
350  // Prepare statement (mdValues is not used).
351  if (!myMDSql->initializeInsert( &labels, mdValues))
352  {
353  std::cerr << "initAddRow: error executing myMDSql->initializeInsert" << std::endl;
354  success = false;
355  }
356 
357  return success;
358 }
359 
360 
362 {
363  int j = 0;
364  bool success = true;
365  std::vector<const MDObject*> mdValues;
366 
367  // Set values vector size.
368  mdValues.resize(row.size());
369 
370  // Get values to insert.
371  j = 0;
372  for (const MDObject* obj : row) {
373  addLabel(obj->label);
374  mdValues[j] = row.getObject(obj->label);
375  j++;
376  }
377  mdValues.resize(j);
378 
379  // Execute statement.
380  if (!myMDSql->setObjectValues( -1, mdValues))
381  {
382  std::cerr << "execAddRow: error executing myMDSql->setObjectValues" << std::endl;
383  success = false;
384  }
385 
386  return(success);
387 }
388 
390 {
391  myMDSql->finalizePreparedStmt();
392 }
393 
394 size_t MetaDataDb::addRow(const MDRow &row)
395 {
396  size_t id = addObject();
397  for (auto obj : row)
398  if (obj->label != MDL_FIRST_LABEL)
399  setValue(*obj, id);
400 
401  return id;
402 }
403 
404 bool MetaDataDb::getRowValues(size_t id, std::vector<MDObject> &values) const {
405  for (auto &v : values) {
406  if (!containsLabel(v.label))
407  return false;
408  }
409  if (id == BAD_OBJID)
410  REPORT_ERROR(ERR_MD_NOACTIVE, "getValue: please provide objId other than -1");
411  return sqlUtils::select(id,
412  myMDSql->db,
413  myMDSql->tableName(myMDSql->tableId),
414  values);
415 }
416 
418 {
419  addRows({row});
420 }
421 
423  // find missing labels
424  std::vector<MDLabel> missingLabels;
425  auto definedLabels = row.labels();
426  for (const auto &l : definedLabels){
427  if ( ! containsLabel(l)) {
428  missingLabels.emplace_back(l);
429  }
430  }
431  // add missing labels
432  if ( ! missingLabels.empty()) {
433  sqlUtils::addColumns(missingLabels,
434  myMDSql->db,
435  myMDSql->tableName(myMDSql->tableId));
436  this->_activeLabels.insert(this->_activeLabels.end(), missingLabels.begin(), missingLabels.end());
437  }
438 }
439 
440 void MetaDataDb::addRows(const std::vector<MDRowSql> &rows)
441 {
442  const auto noOfRows = rows.size();
443  if (0 == noOfRows) {
444  return;
445  }
446  const auto &firstRow = rows.at(0);
447 
448  // assuming all rows are using the same labels
450 
451  // create mask of valid labels
452  std::vector<MDLabel> labels;
453  labels.reserve(firstRow.size());
454  for (const MDObject* obj : firstRow)
455  labels.emplace_back(obj->label);
456  const auto noOfLabels = labels.size();
457 
458  // extract values to be added
459  std::vector<std::vector<const MDObject*>> records;
460  records.reserve(noOfRows);
461  for (const auto &r : rows) {
462  records.emplace_back(std::vector<const MDObject*>());
463  auto &vals = records.back();
464  vals.reserve(noOfLabels);
465  for (const auto &l : labels) {
466  vals.emplace_back(r.getObject(l));
467  }
468  }
469  // insert values to db
470  sqlUtils::insert(records, myMDSql->db,
471  myMDSql->tableName(myMDSql->tableId));
472 }
473 
474 
475 size_t MetaDataDb::addRow2(const MDRow &row)
476 {
477  size_t id = BAD_OBJID;
478 
479  // Initialize INSERT.
480  if (initAddRow( row))
481  {
482  // Execute INSERT.
483  if (execAddRow( row))
484  {
485  // Get last inserted row id.
486  id = myMDSql->getObjId();
487  }
488 
489  // Finalize INSERT.
490  finalizeAddRow();
491  }
492 
493  return(id);
494 }
495 
497 {
498  myMDSql = new MDSql(this);
499  init({});
500 }//close MetaData default Constructor
501 
503  myMDSql = new MDSql(this);
504  init({});
506 }
507 
508 MetaDataDb::MetaDataDb(const std::vector<MDLabel> &labelsVector)
509 {
510  myMDSql = new MDSql(this);
511  init(labelsVector);
512 }//close MetaData default Constructor
513 
514 MetaDataDb::MetaDataDb(const FileName &fileName, const std::vector<MDLabel> &desiredLabels)
515 {
516  myMDSql = new MDSql(this);
517  init(desiredLabels);
518  read(fileName, desiredLabels.empty() ? nullptr : &desiredLabels);
519 }//close MetaData from file Constructor
520 
522 {
523  myMDSql = new MDSql(this);
524  copyMetadata(md);
525 }//close MetaData copy Constructor
526 
528 {
529  copyMetadata(md);
530  return *this;
531 }
532 
534 {
535  _clear();
536  delete myMDSql;
537 }//close MetaData Destructor
538 
539 //-------- Getters and Setters ----------
540 
541 int MetaDataDb::getMaxStringLength(const MDLabel thisLabel) const
542 {
543  if (!containsLabel(thisLabel))
544  return -1;
545 
546  return myMDSql->columnMaxLength(thisLabel);
547 }
548 
549 size_t MetaDataDb::size() const
550 {
551  return myMDSql->size();
552 }
553 
554 bool MetaDataDb::addLabel(const MDLabel label, int pos)
555 {
556  if (containsLabel(label))
557  return false;
558  if (pos < 0 || pos >= (int)this->_activeLabels.size())
559  this->_activeLabels.emplace_back(label);
560  else
561  this->_activeLabels.insert(this->_activeLabels.begin() + pos, label);
562  myMDSql->addColumn(label);
563  return true;
564 }
565 
567 {
568  std::vector<MDLabel>::iterator location;
569  location = std::find(this->_activeLabels.begin(), this->_activeLabels.end(), label);
570 
571  if (location == this->_activeLabels.end())
572  return false;
573 
574  this->_activeLabels.erase(location);
575  return true;
576 }
577 
579 {
580  return (size_t)myMDSql->addRow();
581 }
582 
583 void MetaDataDb::importObject(const MetaData &md, const size_t id, bool doClear)
584 {
585  // Currently supports importing only from MetaDataDb
586  assert(dynamic_cast<const MetaDataDb*>(&md) != nullptr);
587 
588  const MetaDataDb& mdd = dynamic_cast<const MetaDataDb&>(md);
589  MDValueEQ query(MDL_OBJID, id);
590  mdd.myMDSql->copyObjects(this, &query);
591 }
592 
593 void MetaDataDb::importObjects(const MetaData &md, const std::vector<size_t> &objectsToAdd, bool doClear)
594 {
595  const std::vector<MDLabel>& labels = md.getActiveLabels();
596  init(labels);
597  copyInfo(md);
598  int size = objectsToAdd.size();
599  for (int i = 0; i < size; i++)
600  importObject(md, objectsToAdd[i]);
601 }
602 
603 void MetaDataDb::importObjects(const MetaData &md, const MDQuery &query, bool doClear)
604 {
605  // Currently supports importing only from MetaDataDb
606  assert(dynamic_cast<const MetaDataDb*>(&md) != nullptr);
607 
608  const MetaDataDb& mdd = dynamic_cast<const MetaDataDb&>(md);
609  this->_importObjectsDb(mdd, query, doClear);
610 }
611 
612 void MetaDataDb::_importObjectsDb(const MetaDataDb &md, const MDQuery &query, bool doClear)
613 {
614  if (doClear)
615  {
616  //Copy all structure and info from the other metadata
617  init(md._activeLabels);
618  copyInfo(md);
619  }
620  else
621  {
622  //If not clear, ensure that the have the same labels
623  for (size_t i = 0; i < md._activeLabels.size(); i++)
624  addLabel(md._activeLabels[i]);
625  }
626  md.myMDSql->copyObjects(this, &query);
627 }
628 
629 bool MetaDataDb::removeObject(size_t id) {
630  int removed = removeObjects(MDValueEQ(MDL_OBJID, id));
631  return (removed > 0);
632 }
633 
634 void MetaDataDb::removeObjects(const std::vector<size_t> &toRemove)
635 {
636  int size = toRemove.size();
637  for (int i = 0; i < size; i++)
638  removeObject(toRemove[i]);
639 }
640 
642 {
643  int removed = myMDSql->deleteObjects(&query);
644  return removed;
645 }
646 
648 {
649  int removed = myMDSql->deleteObjects();
650  return removed;
651 }
652 
653 void MetaDataDb::addIndex(MDLabel label) const
654 {
655  std::vector<MDLabel> labels(1);
656  labels[0]=label;
657  addIndex(labels);
658 }
659 void MetaDataDb::addIndex(const std::vector<MDLabel> &desiredLabels) const
660 {
661 
662  myMDSql->indexModify(desiredLabels, true);
663 }
664 
666 {
667  std::vector<MDLabel> labels(1);
668  labels[0]=label;
669  removeIndex(labels);
670 }
671 
672 void MetaDataDb::removeIndex(const std::vector<MDLabel> &desiredLabels)
673 {
674  myMDSql->indexModify(desiredLabels, false);
675 }
676 
678 {
680  fillLinear(MDL_ITEM_ID,1,1);
681 }
682 
684 {
686 }
687 
688 //----------Iteration functions -------------------
689 
691 {
692  return myMDSql->firstRow();
693 }
694 
695 size_t MetaDataDb::firstObject(const MDQuery & query) const
696 {
697  std::vector<size_t> ids;
698  findObjects(ids, query);
699  size_t id = ids.size() == 1 ? ids[0] : BAD_OBJID;
700  return id;
701 }
702 
703 size_t MetaDataDb::lastRowId() const
704 {
705  return myMDSql->lastRow();
706 }
707 
708 //-------------Search functions-------------------
709 void MetaDataDb::findObjects(std::vector<size_t> &objectsOut, const MDQuery &query) const
710 {
711  objectsOut.clear();
712  myMDSql->selectObjects(objectsOut, &query);
713 }
714 
715 void MetaDataDb::findObjects(std::vector<size_t> &objectsOut, int limit) const
716 {
717  objectsOut.clear();
718  MDQuery query(limit);
719  myMDSql->selectObjects(objectsOut, &query);
720 }
721 
722 size_t MetaDataDb::countObjects(const MDQuery &query) const
723 {
724  std::vector<size_t> objects;
725  findObjects(objects, query);
726  return objects.size();
727 }
728 
729 bool MetaDataDb::containsObject(size_t objectId) const
730 {
731  return containsObject(MDValueEQ(MDL_OBJID, objectId));
732 }
733 
734 bool MetaDataDb::containsObject(const MDQuery &query) const
735 {
736  std::vector<size_t> objects;
737  findObjects(objects, query);
738  return objects.size() > 0;
739 }
740 
741 //--------------IO functions -----------------------
742 #include <sys/stat.h>
743 #include <fcntl.h>
744 #ifdef XMIPP_MMAP
745 #include <sys/mman.h>
746 #endif
747 
748 void MetaDataDb::write(const FileName &_outFile, WriteModeMetaData mode) const
749 {
750  String blockName;
751  FileName outFile;
752  FileName extFile;
753 
754  blockName=_outFile.getBlockName();
755  if (blockName.empty())
756  blockName = DEFAULT_BLOCK_NAME;
757  outFile = _outFile.removeBlockName();
758  extFile = _outFile.getExtension();
759 
760  if (extFile=="xml")
761  {
762  writeXML(outFile, blockName, mode);
763  }
764  else if(extFile=="sqlite")
765  {
766  writeDB(outFile, blockName, mode);
767  }
768  else
769  {
770  writeStar(outFile, blockName, mode);
771  }
772 }
773 
774 void MetaDataDb::_writeRows(std::ostream &os) const
775 {
776 
777  auto sortedLabels = this->_activeLabels;
778  std::sort(sortedLabels.begin(), sortedLabels.end());
779  for (const auto& row : *this)
780  {
781  for (size_t i = 0; i < sortedLabels.size(); i++)
782  {
783  if (sortedLabels[i] != MDL_STAR_COMMENT)
784  {
785  os.width(1);
786  row.getObject(sortedLabels[i])->toStream(os, true);
787  os << " ";
788  }
789  }
790 
791  os << '\n';
792  }
793 }
794 
795 void MetaDataDb::write(std::ostream &os,const String &blockName, WriteModeMetaData mode ) const
796 {
797  if(mode==MD_OVERWRITE)
798  os << FileNameVersion << " * "// << (isColumnFormat ? "column" : "row")
799  << '\n' //write which type of format (column or row) and the path;
800  << WordWrap(this->_comment, line_max); //write md comment in the 2nd comment line of header
801  //write data block
802  String _szBlockName("data_");
803  _szBlockName += blockName;
804 
805  if (this->isColumnFormat())
806  {
807  //write md columns in 3rd comment line of the header
808  os << _szBlockName << '\n';
809  os << "loop_" << '\n';
810  auto sortedLabels = this->_activeLabels;
811  std::sort(sortedLabels.begin(), sortedLabels.end());
812  for (size_t i = 0; i < sortedLabels.size(); i++)
813  {
814  const auto &label = sortedLabels.at(i);
815  if (label != MDL_STAR_COMMENT)
816  {
817  os << " _" << MDL::label2Str(label) << '\n';
818  }
819  }
820  _writeRows(os);
821 
822  //Put the activeObject to the first, if exists
823  }
824  else //rowFormat
825  {
826  os << _szBlockName << '\n';
827 
828  // Get first object. In this case (row format) there is a single object
829  size_t id = firstRowId();
830 
831  if (id != BAD_OBJID)
832  {
833  auto sortedLabels = this->_activeLabels;
834  std::sort(sortedLabels.begin(), sortedLabels.end());
835  for (size_t i = 0; i < sortedLabels.size(); i++)
836  {
837  const auto &label = sortedLabels.at(i);
838  if (label != MDL_STAR_COMMENT)
839  {
840  MDObject mdValue(label);
841  os << " _" << MDL::label2Str(label) << " ";
842  myMDSql->getObjectValue(id, mdValue);
843  mdValue.toStream(os);
844  os << '\n';
845  }
846  }
847  }
848 
849  }
850 }//write
851 
852 
853 void MetaDataDb::_parseObjects(std::istream &is, std::vector<MDObject*> &columnValues, const std::vector<MDLabel> *desiredLabels, bool firstTime)
854 {
855  size_t i=0; // Loop counter.
856  size_t size=0; // Column values vector size.
857 
858  // Columns loop.
859  size = columnValues.size();
860  for (i=0; i<size ;i++)
861  {
862  columnValues[i]->fromStream(is);
863  if (is.fail())
864  {
865  String errorMsg = formatString("MetaData: Error parsing column '%s' value.", MDL::label2Str(columnValues[i]->label).c_str());
866  columnValues[i]->failed = true;
867  std::cerr << "WARNING: " << errorMsg << std::endl;
868  //REPORT_ERROR(ERR_MD_BADLABEL, (String)"read: Error parsing data column, expecting " + MDL::label2Str(object.label));
869  }
870  else
871  {
872  if (firstTime)
873  {
874  // Check if current column label exists.
875  if (columnValues[i]->label != MDL_UNDEFINED)
876  {
877  // If there are no desired labels then add all.
878  bool reallyAdd=false;
879  if (desiredLabels==NULL)
880  {
881  reallyAdd=true;
882  }
883  else
884  {
885  // Check if current column belongs to desired labels.
886  for (size_t j=0; j<desiredLabels->size(); ++j)
887  {
888  if ((*desiredLabels)[j]==columnValues[i]->label)
889  {
890  reallyAdd=true;
891  break;
892  }
893  }
894  }
895 
896  // Add label if not exists.
897  if (reallyAdd)
898  {
899  addLabel(columnValues[i]->label);
900  }
901  }
902  }
903  }
904  }
905 
906  // Insert elements in DB.
907  myMDSql->setObjectValues( -1, columnValues, desiredLabels);
908 }
909 
910 
911 
912 void MetaDataDb::read(const FileName &_filename,
913  const std::vector<MDLabel> *desiredLabels,
914  bool decomposeStack)
915 {
916  String blockName;
917  FileName inFile;
918 
919  blockName=_filename.getBlockName();
920  // if (blockName.empty())
921  // blockName = DEFAULT_BLOCK_NAME;
922  inFile = _filename.removeBlockName();
923  String extFile = _filename.getExtension();
924  blockName=escapeForRegularExpressions(blockName);
925 
926  _clear();
927  myMDSql->createMd();
928  this->setColumnFormat(true);
929 
930  if (extFile=="xml")
931  readXML(inFile, desiredLabels, blockName, decomposeStack);
932  else if(extFile=="sqlite")
933  readDB(inFile, desiredLabels, blockName, decomposeStack);
934  else
935  readStar(_filename, desiredLabels, blockName, decomposeStack);
936 }
937 
938 
939 void MetaDataDb::readPlain(const FileName &inFile, const String &labelsString, const String &separator)
940 {
941  constexpr size_t LINE_LENGTH = 1024;
942  clear();
943  std::vector<MDLabel> labels;
944  MDL::str2LabelVector(labelsString, labels);
945 
946  char lineBuffer[LINE_LENGTH];
947  String line;
948  std::ifstream is(inFile.c_str(), std::ios_base::in);
949  size_t lineCounter = 0;
950  size_t columnsNumber = labels.size();
951  size_t objId;
952  StringVector parts;
953 
954  while (is.getline(lineBuffer, LINE_LENGTH))
955  {
956  ++lineCounter;
957  line.assign(lineBuffer);
958  trim(line);
959  if (line[0]=='#') // This is an old Xmipp comment
960  continue;
961  if (!line.empty())
962  {
963  std::stringstream ss(line);
964  objId = addObject();
965  for (size_t i = 0; i < columnsNumber; ++i)
966  {
967  MDObject obj(labels[i]);
968  _parseObject(ss, obj, objId);
969  setValue(obj, objId);
970  }
971  }
972  }
973 }
974 
975 void MetaDataDb::addPlain(const FileName &inFile, const String &labelsString, const String &separator)
976 {
977  MetaDataDb md2;
978  md2.readPlain(inFile, labelsString);
979  merge(md2);
980 }
981 
983 {
984 #ifdef XMIPP_MMAP
985  String blockName;
986  FileName outFile;
987 
988  blockName=_inFile.getBlockName();
989  outFile = _inFile.removeBlockName();
990 
991  struct stat file_status;
992  int fd;
993  char *map;
994 
995  //check if file exists or not block name has been given
996  //in our format no two identical data_xxx strings may exists
997 
998  if (blockName.empty() || !outFile.exists())
999  return false;
1000  else
1001  {
1002  //does blockname exists?
1003  //remove it from file in this case
1004  // get length of file:
1005  if(stat(outFile.c_str(), &file_status) != 0)
1006  REPORT_ERROR(ERR_IO_NOPATH,"Metadata:existsBlock can not get filesize for file "+outFile);
1007  size_t size = file_status.st_size;
1008  if(size!=0)//size=0 for /dev/stderr
1009  {
1010  fd = open(outFile.c_str(), O_RDWR, S_IREAD | S_IWRITE);
1011  if (fd == -1)
1012  REPORT_ERROR(ERR_IO_NOPATH,"Metadata:existsBlock can not read file named "+outFile);
1013 
1014  map = (char *) mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
1015  if (map == MAP_FAILED)
1016  REPORT_ERROR(ERR_MEM_BADREQUEST,"Metadata:existsBlock can not map memory ");
1017 
1018  // Is this a START formatted FILE
1019  String _szBlockName = (String)("\ndata_") + blockName;
1020  size_t blockNameSize = _szBlockName.size();
1021  close(fd);
1022  bool found=_memmem(map, size, _szBlockName.data(), blockNameSize) != NULL;
1023  if (munmap(map, size) == -1)
1024  REPORT_ERROR(ERR_MEM_NOTDEALLOC,"metadata:write, Can not unmap memory");
1025  return found;
1026  }
1027  return false;
1028  }
1029 #else
1030  REPORT_ERROR(ERR_MMAP,"Mapping not supported in Windows");
1031 #endif
1032 }
1033 void MetaDataDb::readXML(const FileName &filename,
1034  const std::vector<MDLabel> *desiredLabels,
1035  const String & blockRegExp,
1036  bool decomposeStack)
1037 {
1038  REPORT_ERROR(ERR_NOT_IMPLEMENTED,"readXML not implemented yet");
1039 }
1040 
1041 void MetaDataDb::readDB(const FileName &filename,
1042  const std::vector<MDLabel> *desiredLabels,
1043  const String & blockRegExp,
1044  bool decomposeStack)//what is decompose stack for?
1045 {
1046  myMDSql->copyTableFromFileDB(blockRegExp, filename, desiredLabels, _maxRows);
1047 }
1048 
1049 /* This function parses rows data in START format
1050  */
1051 void MetaDataDb::_readRowsStar(mdBlock &block, std::vector<MDObject*> & columnValues,
1052  const std::vector<MDLabel> *desiredLabels) {
1053  String line;
1054  std::stringstream ss;
1055  size_t n = block.end - block.loop;
1056  bool firstTime = true;
1057 
1058  if (n == 0)
1059  return;
1060 
1061  char * buffer = new char[n];
1062  memcpy(buffer, block.loop, n);
1063  char *iter = buffer, *end = iter + n, * newline = NULL;
1064  _parsedLines = 0; //Check how many lines the md have
1065 
1066  if (myMDSql->initializeInsert( desiredLabels, columnValues))
1067  {
1068  while (iter < end) { //while there are data lines
1069  //Adding \n position and check if NULL at the same time
1070  if (!(newline = END_OF_LINE()))
1071  newline = end;
1072  line.assign(iter, newline - iter);
1073  trim(line);
1074 
1075  if (!line.empty() && line[0] != '#') {
1076  //_maxRows would be > 0 if we only want to read some
1077  // rows from the md for performance reasons...
1078  // anyway the number of lines will be counted in _parsedLines
1079  if (_maxRows == 0 || _parsedLines < _maxRows) {
1080  std::stringstream ss(line);
1081  this->_parseObjects(ss, columnValues, desiredLabels, firstTime);
1082  firstTime = false;
1083  }
1084  _parsedLines++;
1085  }
1086  iter = newline + 1; //go to next line
1087  }
1088 
1089  myMDSql->finalizePreparedStmt();
1090  }
1091 
1092  delete[] buffer;
1093 }
1094 
1095 /*This function will read the md data if is in row format */
1097  String line, token;
1098  MDLabel label;
1099 
1100  size_t objectID = addObject();
1101 
1102  // Read data and fill structures accordingly
1103  while (getline(is, line, '\n'))
1104  {
1105  if (line[0] == '#' || line[0] == '\0' || line[0] == ';')
1106  continue;
1107 
1108  // Parse labels
1109  std::stringstream os(line);
1110 
1111  os >> token;
1112  label = MDL::str2Label(token);
1113  MDObject value(label);
1114  os >> value;
1115  if (label != MDL_UNDEFINED)
1116  setValue(value, objectID);
1117  }
1118 }
1119 
1120 void MetaDataDb::merge(const MetaData &md2)
1121 {
1122  if (size() != md2.size())
1123  REPORT_ERROR(ERR_MD, "Size of two metadatas should coincide for merging.");
1124 
1125  for (const auto& row : md2)
1126  this->setRow(row, row.id());
1127 }
1128 
1129 #define SET_AND_FILL() generator.label=label; generator.fill(*this)
1130 
1132 {
1133  //aggregate metadata by label (that is, avoid repetitions
1134  MetaDataDb mdCTFs;
1135  mdCTFs.distinct(*this,label);
1136  //read file-metadatas in new metadata
1137  MetaDataDb ctfModel;
1138  FileName fn;
1139  MDRowSql row;
1140 
1141  for (size_t id : mdCTFs.ids())
1142  {
1143  if (mdCTFs.getValue(label, fn, id))
1144  {
1145  ctfModel.read(fn);
1146  if (ctfModel.isEmpty())
1147  REPORT_ERROR(ERR_VALUE_INCORRECT, "Only can expand non empty metadatas");
1148  ctfModel.getRow(row, ctfModel.firstRowId());
1149  mdCTFs.setRow(row, id);
1150  }
1151  }
1152  //join
1153  MetaDataDb md(*this);
1154  join1(md, mdCTFs, label);
1155 }
1156 
1157 void MetaDataDb::fillConstant(MDLabel label, const String &value)
1158 {
1159  MDConstGenerator generator(value);
1160  SET_AND_FILL();
1161 }
1162 
1163 void MetaDataDb::fillRandom(MDLabel label, const String &mode, double op1, double op2, double op3)
1164 {
1165  MDRandGenerator generator(op1, op2, mode, op3);
1166  SET_AND_FILL();
1167 }
1168 
1169 void MetaDataDb::fillLinear(MDLabel label, double initial, double step)
1170 {
1171  MDLinealGenerator generator(initial, step);
1172  SET_AND_FILL();
1173 }
1174 
1175 void MetaDataDb::copyColumn(MDLabel labelDest, MDLabel labelSrc)
1176 {
1177  String srcName = MDL::label2Str(labelSrc);
1178  if (!containsLabel(labelSrc))
1179  REPORT_ERROR(ERR_ARG_MISSING, formatString("Source label: '%s' doesn't exist on metadata", srcName.c_str()));
1180  addLabel(labelDest);
1181 
1182  String destName = MDL::label2Str(labelDest);
1183  String cmd = formatString("%s=%s", destName.c_str(), srcName.c_str());
1184  operate(cmd);
1185 }
1186 
1187 void MetaDataDb::copyColumnTo(MetaData &md, MDLabel labelDest, MDLabel labelSrc)
1188 {
1189  if (!containsLabel(labelSrc))
1190  REPORT_ERROR(ERR_ARG_MISSING, formatString("Source label: '%s' doesn't exist on metadata",
1191  (MDL::label2Str(labelSrc)).c_str()));
1192  md.addLabel(labelDest);
1193  std::vector<MDObject> values;
1194  getColumnValues(labelSrc, values);
1195  md.setColumnValues(values);
1196 }
1197 
1198 void MetaDataDb::renameColumn(MDLabel oldLabel, MDLabel newLabel)
1199 {
1200  if (!containsLabel(oldLabel))
1201  REPORT_ERROR(ERR_ARG_MISSING, formatString("Source label: '%s' doesn't exist on metadata",
1202  (MDL::label2Str(oldLabel)).c_str()));
1203  std::vector<MDLabel> vOldLabel(1);
1204  vOldLabel[0]=oldLabel;
1205  std::vector<MDLabel> vNewLabel(1);
1206  vNewLabel[0]=newLabel;
1207  renameColumn(vOldLabel,vNewLabel);
1208 }
1209 
1210 void MetaDataDb::renameColumn(const std::vector<MDLabel> &vOldLabel,
1211  const std::vector<MDLabel> &vNewLabel)
1212 {
1213  myMDSql->renameColumn(vOldLabel,vNewLabel);
1214 }
1215 
1217  MDLabel aggregateLabel)
1218 
1219 {
1220  mdValueOut.setValue(myMDSql->aggregateSingleDouble(op,aggregateLabel));
1221 }
1222 
1224  MDLabel aggregateLabel)
1225 
1226 {
1227  mdValueOut.setValue(myMDSql->aggregateSingleSizeT(op,aggregateLabel));
1228 }
1229 
1230 
1232 {
1233  MDObject result(column);
1234  aggregateSingle(result, AGGR_MAX, column);
1235  return result.getValue2(double());
1236 }
1237 
1239 {
1240  MDObject result(column);
1241  aggregateSingle(result, AGGR_MIN, column);
1242  return result.getValue2(double());
1243 }
1244 
1245 
1247  MDLabel aggregateLabel)
1248 
1249 {
1250  size_t aux = myMDSql->aggregateSingleSizeT(op,aggregateLabel);
1251  int aux2 = (int) aux;
1252  mdValueOut.setValue(aux2);
1253 }
1254 
1256  MDLabel aggregateLabel, MDLabel operateLabel, MDLabel resultLabel)
1257 {
1258  std::vector<MDLabel> labels(2);
1259  std::vector<MDLabel> operateLabels(1);
1260  labels[0] = aggregateLabel;
1261  labels[1] = resultLabel;
1262  operateLabels[0]=operateLabel;
1263  init(labels);
1264  std::vector<AggregateOperation> ops(1);
1265  ops[0] = op;
1266  mdIn.myMDSql->aggregateMd(this, ops, operateLabels);
1267 }
1268 
1269 void MetaDataDb::aggregate(const MetaDataDb &mdIn, const std::vector<AggregateOperation> &ops,
1270  const std::vector<MDLabel> &operateLabels,
1271  const std::vector<MDLabel> &resultLabels)
1272 {
1273  if (resultLabels.size() - ops.size() != 1)
1274  REPORT_ERROR(ERR_MD, "Labels vectors should contain one element more than operations");
1275  init(resultLabels);
1276  mdIn.myMDSql->aggregateMd(this, ops, operateLabels);
1277 }
1278 
1280  AggregateOperation op,
1281  const std::vector<MDLabel> &groupByLabels,
1282  MDLabel operateLabel,
1283  MDLabel resultLabel)
1284 {
1285  std::vector<MDLabel> labels;
1286  labels = groupByLabels;
1287  labels.emplace_back(resultLabel);
1288  init(labels);
1289  mdIn.myMDSql->aggregateMdGroupBy(this, op, groupByLabels, operateLabel, resultLabel);
1290 }
1291 
1292 //-------------Set Operations ----------------------
1294  const MDLabel label,
1295  SetOperation operation)
1296 {
1297  std::vector<MDLabel> labels;
1298  labels.emplace_back(label);
1299  _setOperates(mdIn,labels,operation);
1300 }
1301 
1303  const std::vector<MDLabel> &labels,
1304  SetOperation operation)
1305 {
1306  if (this == &mdIn) //not sense to operate on same metadata
1307  REPORT_ERROR(ERR_MD, "Couldn't perform this operation on input metadata");
1308  if (size() == 0 && mdIn.size() == 0)
1309  REPORT_ERROR(ERR_MD, "Couldn't perform this operation if both metadata are empty");
1310  //Add labels to be sure are present
1311  for (size_t i = 0; i < mdIn._activeLabels.size(); i++)
1312  addLabel(mdIn._activeLabels[i]);
1313 
1314  mdIn.myMDSql->setOperate(this, labels, operation);
1315 }
1316 
1318  const MDLabel label,
1319  SetOperation operation)
1320 {
1321  if (this == &mdIn) //not sense to operate on same metadata
1322  REPORT_ERROR(ERR_MD, "Couldn't perform this operation on input metadata");
1323  if (mdIn.size() == 0)
1324  REPORT_ERROR(ERR_MD, "Couldn't perform this operation if both metadata are empty");
1325  //Add label to be sure is present in output
1326  addLabel(label);
1327  std::vector<MDLabel> labels;
1328  labels.emplace_back(label);
1329  mdIn.myMDSql->setOperate(this, labels, operation);
1330 }
1331 
1333  const MetaDataDb &mdInRight,
1334  const std::vector<MDLabel> &labelsLeft,
1335  const std::vector<MDLabel> &labelsRight,
1336  SetOperation operation)
1337 {
1338  if (this == &mdInLeft || this == &mdInRight) //not sense to operate on same metadata
1339  REPORT_ERROR(ERR_MD, "Couldn't perform this operation on input metadata");
1340  //Add labels to be sure are present
1341  for (size_t i = 0; i < mdInLeft._activeLabels.size(); i++)
1342  addLabel(mdInLeft._activeLabels[i]);
1343  for (size_t i = 0; i < mdInRight._activeLabels.size(); i++)
1344  {
1345  bool found=false;
1346  for (size_t j=0; j<labelsRight.size(); ++j)
1347  if (mdInRight._activeLabels[i]==labelsRight[j])
1348  {
1349  found=true;
1350  break;
1351  }
1352  if (!found)
1353  addLabel(mdInRight._activeLabels[i]);
1354  }
1355 
1356  myMDSql->setOperate(&mdInLeft, &mdInRight, labelsLeft,labelsRight, operation);
1357 }
1358 
1359 void MetaDataDb::unionDistinct(const MetaDataDb &mdIn, const MDLabel label)
1360 {
1361  if(mdIn.isEmpty())
1362  return;
1363  _setOperates(mdIn, label, UNION_DISTINCT);
1364 }
1365 
1367 {
1368  if(mdIn.isEmpty())
1369  return;
1370  _setOperates(mdIn, MDL_UNDEFINED, UNION);//label not needed for unionAll operation
1371 }
1372 
1373 
1374 void MetaDataDb::intersection(const MetaDataDb &mdIn, const MDLabel label)
1375 {
1376  if(mdIn.isEmpty())
1377  clear();
1378  else
1379  _setOperates(mdIn, label, INTERSECTION);
1380 }
1381 
1383 {
1384  if(MDin.isEmpty())
1385  return;
1386  _setOperates(MDin, label, REMOVE_DUPLICATE);
1387 }
1388 
1390 {
1391  if(MDin.isEmpty())
1392  return;
1393  _setOperatesLabel(MDin, label, DISTINCT);
1394 }
1395 
1396 void MetaDataDb::subtraction(const MetaDataDb &mdIn, const MDLabel label)
1397 {
1398  if(mdIn.isEmpty())
1399  return;
1400  _setOperates(mdIn, label, SUBSTRACTION);
1401 }
1402 
1403 void MetaDataDb::join1(const MetaDataDb &mdInLeft, const MetaDataDb &mdInRight, const MDLabel label, JoinType type)
1404 {
1405  join2(mdInLeft, mdInRight, label, label, type);
1406 }
1407 
1408 void MetaDataDb::join2(const MetaDataDb &mdInLeft, const MetaDataDb &mdInRight, const MDLabel labelLeft,
1409  const MDLabel labelRight, JoinType type)
1410 {
1411  clear();
1412  std::vector<MDLabel> labelsLeft, labelsRight;
1413  labelsLeft.emplace_back(labelLeft);
1414  labelsRight.emplace_back(labelRight);
1415  _setOperates(mdInLeft, mdInRight, labelsLeft,labelsRight, (SetOperation)type);
1416 }
1417 
1418 void MetaDataDb::join1(const MetaDataDb &mdInLeft, const MetaDataDb &mdInRight, const std::vector<MDLabel> &labels, JoinType type)
1419 {
1420  join2(mdInLeft, mdInRight, labels, labels, type);
1421 }
1422 
1423 void MetaDataDb::join2(const MetaDataDb &mdInLeft, const MetaDataDb &mdInRight, const std::vector<MDLabel> &labelsLeft,
1424  const std::vector<MDLabel> &labelsRight, JoinType type)
1425 {
1426  clear();
1427  _setOperates(mdInLeft, mdInRight, labelsLeft,labelsRight, (SetOperation)type);
1428 }
1429 
1430 void MetaDataDb::joinNatural(const MetaDataDb &mdInLeft, const MetaDataDb &mdInRight)
1431 {
1432  join2(mdInLeft, mdInRight, MDL_UNDEFINED, MDL_UNDEFINED, NATURAL);
1433 }
1434 
1435 void MetaDataDb::operate(const String &expression)
1436 {
1437  if (!myMDSql->operate(expression))
1438  REPORT_ERROR(ERR_MD, "MetaDataDb::operate: error doing operation");
1439 }
1440 
1441 void MetaDataDb::replace(const MDLabel label, const String &oldStr, const String &newStr)
1442 {
1443  String labelStr = MDL::label2Str(label);
1444  String expression = formatString("%s=replace(%s,'%s', '%s')",
1445  labelStr.c_str(), labelStr.c_str(), oldStr.c_str(), newStr.c_str());
1446  if (!myMDSql->operate(expression))
1447  REPORT_ERROR(ERR_MD, "MetaDataDb::replace: error doing operation");
1448 }
1449 
1451 {
1452  std::random_device rd;
1453  auto g = std::mt19937(rd());
1454  std::vector<size_t> objects;
1455  MDin.myMDSql->selectObjects(objects);
1456  std::shuffle(objects.begin(), objects.end(), g);
1457  importObjects(MDin, objects);
1458 }
1459 
1460 void MetaDataDb::sort(MetaDataDb &MDin, const MDLabel sortLabel,bool asc, int limit, int offset)
1461 {
1462  if (MDin.containsLabel(sortLabel))
1463  {
1464  init(MDin._activeLabels);
1465  copyInfo(MDin);
1466  //if you sort just once the index will not help much
1467  addIndex(sortLabel);
1468  MDQuery query(limit, offset, sortLabel,asc);
1469  MDin.myMDSql->copyObjects(this, &query);
1470  }
1471  else
1472  *this=MDin;
1473 }
1474 
1475 void MetaDataDb::sort(MetaDataDb &MDin, const String &sortLabel,bool asc, int limit, int offset)
1476 {
1477  // Check if the label has semicolon
1478  size_t ipos=sortLabel.find(':');
1479  MDLabelType type = MDL::labelType(sortLabel);
1480  if (ipos!=String::npos || type == LABEL_VECTOR_DOUBLE || type == LABEL_VECTOR_SIZET)
1481  {
1482  if(limit != -1 || offset != 0)
1483  REPORT_ERROR(ERR_ARG_INCORRECT,"Limit and Offset are not implemented for vector sorting.");
1484 
1485  MDLabel label;
1486  size_t column;
1487  if (ipos!=String::npos)
1488  {
1489  // Check that the label is a vector field
1490  std::vector< String > results;
1491  splitString(sortLabel,":",results);
1492  column=textToInteger(results[1]);
1493  MDLabelType type = MDL::labelType(results[0]);
1494  if (type != LABEL_VECTOR_DOUBLE || type != LABEL_VECTOR_SIZET)
1495  REPORT_ERROR(ERR_ARG_INCORRECT,"Column specifications cannot be used with non-vector labels");
1496  label = MDL::str2Label(results[0]);
1497  }
1498  else
1499  {
1500  label = MDL::str2Label(sortLabel);
1501  column = 0;
1502  }
1503 
1504  // Get the column values
1506  v.resizeNoCopy(MDin.size());
1507  std::vector<double> vectorValues;
1508  int i = 0;
1509  for (size_t id : MDin.ids())
1510  {
1511  MDin.getValue(label, vectorValues, id);
1512  if (column >= vectorValues.size())
1513  REPORT_ERROR(ERR_MULTIDIM_SIZE,"Trying to access to inexistent column in vector");
1514  DIRECT_A1D_ELEM(v, i) = vectorValues[column];
1515  i++;
1516  }
1517 
1518  // Sort
1519  MultidimArray<int> idx;
1520  v.indexSort(idx);
1521 
1522  // Construct output Metadata
1523  init(MDin._activeLabels);
1524  copyInfo(MDin);
1525  size_t id;
1527  {
1528  MDRowSql row;
1529  MDin.getRow(row,DIRECT_A1D_ELEM(idx,i));
1530  id = addObject();
1531  setRow(row, id);
1532  }
1533  }
1534  else
1535  {
1536  sort(MDin, MDL::str2Label(sortLabel),asc, limit, offset);
1537  }
1538 }
1539 
1540 void MetaDataDb::split(size_t n, std::vector<MetaDataDb> &results, const MDLabel sortLabel)
1541 {
1542  size_t mdSize = size();
1543  if (n > mdSize)
1544  REPORT_ERROR(ERR_MD, "MetaDataDb::split: Couldn't split a metadata in more parts than its size");
1545 
1546  results.clear();
1547  results.resize(n);
1548  for (size_t i = 0; i < n; i++)
1549  {
1550  MetaDataDb &md = results.at(i);
1551  md._selectSplitPart(*this, n, i, mdSize, sortLabel);
1552  }
1553 }
1554 
1556  int n, int part, size_t mdSize,
1557  const MDLabel sortLabel)
1558 {
1559  size_t first, last, n_images;
1560  n_images = divide_equally(mdSize, n, part, first, last);
1561  init(mdIn._activeLabels);
1562  copyInfo(mdIn);
1563  mdIn.myMDSql->copyObjects(this, new MDQuery(n_images, first, sortLabel));
1564 }
1565 
1566 void MetaDataDb::selectSplitPart(const MetaData &mdIn, size_t n, size_t part, const MDLabel sortLabel)
1567 {
1568  if (dynamic_cast<const MetaDataDb*>(&mdIn) != nullptr)
1569  return _selectSplitPart(dynamic_cast<const MetaDataDb&>(mdIn), n, part, sortLabel);
1570  throw std::logic_error("Not yet implemented"); // TODO: use universal functions just on MetaData
1571 }
1572 
1573 void MetaDataDb::_selectSplitPart(const MetaDataDb &mdIn, size_t n, size_t part, const MDLabel sortLabel)
1574 {
1575  size_t mdSize = mdIn.size();
1576  if (n > mdSize)
1577  REPORT_ERROR(ERR_MD, "selectSplitPart: Couldn't split a metadata in more parts than its size");
1578  if (part < 0 || part >= n)
1579  REPORT_ERROR(ERR_MD, "selectSplitPart: 'part' should be between 0 and n-1");
1580  _selectSplitPart(mdIn, n, part, mdSize, sortLabel);
1581 
1582 }
1583 
1584 void MetaDataDb::selectRandomSubset(const MetaData &mdIn, size_t numberOfObjects, const MDLabel sortLabel)
1585 {
1586  if (dynamic_cast<const MetaDataDb*>(&mdIn) != nullptr)
1587  return _selectRandomSubset(dynamic_cast<const MetaDataDb&>(mdIn), numberOfObjects, sortLabel);
1588  throw std::logic_error("Not yet implemented"); // TODO: use universal functions just on MetaData
1589 }
1590 
1591 void MetaDataDb::_selectRandomSubset(const MetaDataDb &mdIn, size_t numberOfObjects, const MDLabel sortLabel)
1592 {
1593  clear();
1594 
1595  MetaDataDb mdAux, mdAux2;
1596  mdAux.randomize(mdIn);
1597  mdAux2.selectPart(mdAux, 0, numberOfObjects);
1598  sort(mdAux2,sortLabel);
1599 }
1600 
1601 void MetaDataDb::selectPart(const MetaData &mdIn, size_t startPosition, size_t numberOfObjects,
1602  const MDLabel sortLabel)
1603 {
1604  if (dynamic_cast<const MetaDataDb*>(&mdIn) != nullptr)
1605  return _selectPart(dynamic_cast<const MetaDataDb&>(mdIn), startPosition, numberOfObjects, sortLabel);
1606  throw std::logic_error("Not yet implemented"); // TODO: use universal functions just on MetaData
1607 }
1608 
1609 void MetaDataDb::_selectPart(const MetaDataDb &mdIn, size_t startPosition, size_t numberOfObjects,
1610  const MDLabel sortLabel)
1611 {
1612  size_t mdSize = mdIn.size();
1613  if (startPosition < 0 || startPosition >= mdSize)
1614  REPORT_ERROR(ERR_MD, "selectPart: 'startPosition' should be between 0 and size()-1");
1615  init(mdIn._activeLabels);
1616  copyInfo(mdIn);
1617  mdIn.myMDSql->copyObjects(this, new MDQuery(numberOfObjects, startPosition, sortLabel));
1618 }
1619 
1621 {
1622 
1623  String aux_string;
1624  String aux_string_path;
1625  char buffer[1024];
1626 
1627  if (!getcwd(buffer, 1023))
1628  REPORT_ERROR(ERR_UNCLASSIFIED,"Cannot get the current directory");
1629  String path_str(buffer);
1630  path_str += "/";
1631  getValue(label, aux_string, firstRowId());
1632 
1633  if (aux_string[0] == '/')
1634  return;
1635 
1636  FileName auxFile;
1637  for (size_t id : this->ids())
1638  {
1639  aux_string_path = path_str;
1640  getValue(label, auxFile, id);
1641 
1642  if (auxFile.isInStack())
1643  {
1644  size_t id = auxFile.find('@',0);
1645  auxFile.insert(id+1,aux_string_path);
1646  setValue(label, auxFile, id);
1647  }
1648  else
1649  {
1650  auxFile.addPrefix(aux_string_path);
1651  setValue(label, auxFile, id);
1652  }
1653  }
1654 }
1655 
1656 void MetaDataDb::writeDB(const FileName fn, const FileName blockname, WriteModeMetaData mode) const
1657 {
1658  if(mode==MD_OVERWRITE)
1659  unlink(fn.c_str());
1660  myMDSql->copyTableToFileDB(blockname,fn);
1661 }
1662 
1663 void MetaDataDb::writeXML(const FileName fn, const FileName blockname, WriteModeMetaData mode) const
1664 {
1665  //fixme
1667  if(mode!=MD_OVERWRITE)
1668  REPORT_ERROR(ERR_NOT_IMPLEMENTED,"XML is only implemented for overwrite mode");
1669  std::ofstream ofs(fn.c_str(), std::ios_base::out|std::ios_base::trunc);
1670  size_t size = this->_activeLabels.size();
1671  ofs << "<" << blockname << ">"<< '\n';
1672  for (size_t id : this->ids())
1673  {
1674  ofs << "<ROW ";
1675  for (size_t i = 0; i < size; i++)
1676  {
1677  if (this->_activeLabels[i] != MDL_STAR_COMMENT)
1678  {
1679  ofs << MDL::label2Str(this->_activeLabels[i]) << "=\"";
1680  MDObject mdValue(this->_activeLabels[i]);
1681  //ofs.width(1);
1682  myMDSql->getObjectValue(id, mdValue);
1683  mdValue.toStream(ofs, true);
1684  ofs << "\" ";
1685  }
1686  }
1687  ofs << " />" << '\n';
1688  }
1689  ofs << "</" << blockname << ">"<< '\n';
1690 }
1691 
1692 void MetaDataDb::writeText(const FileName fn, const std::vector<MDLabel>* desiredLabels) const
1693 {
1694  std::ofstream ofs(fn.c_str(), std::ios_base::trunc|std::ios_base::out);
1695 
1696  if (desiredLabels != NULL)
1697  {
1698  MetaDataDb mdAux(*this);
1699  mdAux._activeLabels = *desiredLabels;
1700  mdAux._writeRows(ofs);
1701  }
1702  else
1703  _writeRows(ofs);
1704  ofs.close();
1705 }
1706 
1707 void MetaDataDb::metadataToVec(std::vector<MDRowSql> &vd)
1708 {
1709  for (const auto& row : *this)
1710  vd.emplace_back(dynamic_cast<const MDRowSql&>(row));
1711 }
1712 
1713 void MetaDataDb::vecToMetadata(const std::vector<MDRow> &rowMetadata)
1714 {
1715  const MDRowSql row;
1716 
1717  for (size_t i=0;i<rowMetadata.size();i++)
1718  this->addRow(rowMetadata[i]);
1719 }
1720 
1721 bool MetaDataDb::operator==(const MetaDataDb& op) const
1722 {
1723  return myMDSql->equals(*(op.myMDSql));
1724 }
1725 
1726 std::ostream& operator<<(std::ostream& o, const MetaData & mD)
1727 {
1728  mD.write(o);
1729  return o;
1730 }
object id (int), NOTE: This label is special and shouldn&#39;t be used
Argument missing.
Definition: xmipp_error.h:114
Just to locate unclassified errors.
Definition: xmipp_error.h:192
void selectRandomSubset(const MetaData &mdIn, size_t numberOfObjects, const MDLabel sortLabel=MDL_OBJID) override
No active object in MetaData.
Definition: xmipp_error.h:155
virtual bool isColumnFormat() const
void _selectSplitPart(const MetaDataDb &mdIn, int n, int part, size_t mdSize, const MDLabel sortLabel)
void addRows(const std::vector< MDRowSql > &rows)
void subtraction(const MetaDataDb &mdIn, const MDLabel label)
void _importObjectsDb(const MetaDataDb &md, const MDQuery &query, bool doClear=true)
int * nmax
void addPlain(const FileName &inFile, const String &labelsString, const String &separator=" ")
void _selectRandomSubset(const MetaDataDb &mdIn, size_t numberOfObjects, const MDLabel sortLabel=MDL_OBJID)
virtual bool empty() const =0
Case or algorithm not implemented yet.
Definition: xmipp_error.h:177
static MDLabel str2Label(const String &labelName)
bool bindValue(size_t id) const
bool operator==(const MetaDataDb &op) const
const int & getValue2(int) const
void selectSplitPart(const MetaData &mdIn, size_t n, size_t part, const MDLabel sortLabel=MDL_OBJID)
static bool insert(const std::vector< std::vector< const MDObject *>> &records, sqlite3 *db, const std::string &table)
Definition: sql_utils.cpp:258
String getBlockName() const
MDLabelType
virtual void clear()
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
#define END_OF_LINE()
Definition: metadata_base.h:69
bool getRowValues(size_t id, std::vector< MDObject > &values) const override
virtual void writeStar(const FileName &outFile, const String &blockName, WriteModeMetaData mode) const
void intersection(const MetaDataDb &mdIn, const MDLabel label)
void _writeRows(std::ostream &os) const override
size_t _parsedLines
doublereal * g
void clear() override
Global mmap error.
Definition: xmipp_error.h:170
void _setOperatesLabel(const MetaDataDb &mdIn, const MDLabel label, SetOperation operation)
void resizeNoCopy(const MultidimArray< T1 > &v)
void _readRowFormat(std::istream &is)
void findObjects(std::vector< size_t > &objectsOut, const MDQuery &query) const override
size_t addRow2(const MDRow &row)
void writeXML(const FileName fn, const FileName blockname, WriteModeMetaData mode) const override
void distinct(MetaDataDb &MDin, MDLabel label)
bool getValue(MDObject &mdValueOut, size_t id) const override
char * end
Definition: metadata_base.h:96
std::vector< SelLine >::iterator find(std::vector< SelLine > &text, const std::string &img_name)
Definition: selfile.cpp:553
virtual void write(const FileName &outFile, WriteModeMetaData mode=MD_OVERWRITE) const =0
void fillExpand(MDLabel label)
HBITMAP buffer
Definition: svm-toy.cpp:37
void trim(std::string &s)
Definition: text.cpp:205
int size() const override
size_t firstObject(const MDQuery &) const override
void unionDistinct(const MetaDataDb &mdIn, const MDLabel label=MDL_OBJID)
void finalizeAddRow(void)
size_t divide_equally(size_t N, size_t size, size_t rank, size_t &first, size_t &last)
virtual std::vector< MDLabel > labels() const =0
String WordWrap(const String &inputString, size_t lineLength)
void aggregateSingleInt(MDObject &mdValueOut, AggregateOperation op, MDLabel aggregateLabel)
void vecToMetadata(const std::vector< MDRow > &rowMetadata)
std::ostream & operator<<(std::ostream &o, const MetaData &mD)
void aggregateSingle(MDObject &mdValueOut, AggregateOperation op, MDLabel aggregateLabel)
bool initGetRow(bool addWhereClause) const
void copyMetadata(const MetaDataDb &md, bool copyObjects=true)
Definition: metadata_db.cpp:71
MDSql * myMDSql
Definition: metadata_db.h:62
Incorrect MultidimArray size.
Definition: xmipp_error.h:174
glob_prnt iter
Memory has not been deallocated.
Definition: xmipp_error.h:167
virtual IdIteratorProxy< false > ids()
bool isMetadataFile
#define FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(v)
String FileNameVersion
bool execGetRow(MDRow &row) const
FileName _inFile
Bad amount of memory requested.
Definition: xmipp_error.h:165
String _comment
void aggregateGroupBy(const MetaDataDb &mdIn, AggregateOperation op, const std::vector< MDLabel > &groupByLabels, MDLabel operateLabel, MDLabel resultLabel)
std::vector< String > StringVector
Definition: xmipp_strings.h:35
bool removeLabel(const MDLabel label) override
#define i
Incorrect number of objects in Metadata.
Definition: xmipp_error.h:160
Unique identifier for items inside a list or set (std::size_t)
void makeAbsPath(const MDLabel label=MDL_IMAGE)
void aggregate(const MetaDataDb &mdIn, AggregateOperation op, MDLabel aggregateLabel, MDLabel operateLabel, MDLabel resultLabel)
void readPlain(const FileName &inFile, const String &labelsString, const String &separator=" ")
void fillConstant(MDLabel label, const String &value) override
static MDLabelType labelType(const MDLabel label)
String getExtension() const
void copyColumnTo(MetaData &md, MDLabel labelDest, MDLabel labelSrc) override
static bool update(const std::vector< const MDObject *> &values, sqlite3 *db, const std::string &table, size_t id)
Definition: sql_utils.cpp:206
void importObject(const MetaData &md, const size_t id, bool doClear=true) override
std::unique_ptr< MDRow > getRow(size_t id) override
bool existsBlock(const FileName &_inFile)
glob_log first
double getColumnMin(MDLabel column)
#define DIRECT_A1D_ELEM(v, i)
void joinNatural(const MetaDataDb &mdInLeft, const MetaDataDb &mdInRight)
void * _memmem(const void *haystack, size_t haystack_len, const void *needle, size_t needle_len)
void join2(const MetaDataDb &mdInLeft, const MetaDataDb &mdInRight, const MDLabel labelLeft, const MDLabel labelRight, JoinType type=LEFT)
iterator end() override
Definition: metadata_db.h:749
MDLabel label
FileName addPrefix(const String &prefix) const
void readXML(const FileName &inFile, const std::vector< MDLabel > *desiredLabels=NULL, const String &blockRegExp=DEFAULT_BLOCK_NAME, bool decomposeStack=true)
size_t countObjects(const MDQuery &query) const override
void randomize(const MetaDataDb &MDin)
virtual bool isEmpty() const
int removeObjects() override
void aggregateSingleSizeT(MDObject &mdValueOut, AggregateOperation op, MDLabel aggregateLabel)
int getMaxStringLength(const MDLabel thisLabel) const override
The label MDL_OBJID is special and should not be used.
void _clear(bool onlyData=false)
Definition: metadata_db.cpp:40
viol type
bool initAddRow(const MDRow &row)
size_t _maxRows
void getColumnValues(const MDLabel label, std::vector< MDObject > &valuesOut) const override
size_t addObject() override
size_t addRow(const MDRow &row) override
int in
void addRowOpt(const MDRowSql &row)
void _parseObjects(std::istream &is, std::vector< MDObject *> &columnValues, const std::vector< MDLabel > *desiredLabels, bool firstTime) override
int splitString(const String &input, const String &delimiter, StringVector &results, bool includeEmpties)
void setValue(const MDObject &object) override
Incorrect argument received.
Definition: xmipp_error.h:113
void addMissingLabels(const MDRow &row)
MetaDataDb & operator=(const MetaDataDb &md)
void _setOperates(const MetaDataDb &mdIn, const MDLabel label, SetOperation operation)
void operate(const String &expression)
friend class MDSql
Definition: metadata_db.h:61
virtual void clear()=0
void readDB(const FileName &inFile, const std::vector< MDLabel > *desiredLabels=NULL, const String &blockRegExp=DEFAULT_BLOCK_NAME, bool decomposeStack=true)
bool getAllRows(std::vector< MDRowSql > &rows) const
#define SET_AND_FILL()
void finalizeGetRow(void) const
void fillLinear(MDLabel label, double initial, double step) override
MetaData error.
Definition: xmipp_error.h:154
void clear() override
Definition: metadata_db.cpp:54
bool getRow2(MDRow &row, size_t id) const
void removeItemId()
void copyInfo(const MetaData &md)
void addItemId()
void removeDuplicates(MetaDataDb &MDin, MDLabel label=MDL_UNDEFINED)
bool containsObject(size_t objectId) const override
void mode
void write(const FileName &outFile, WriteModeMetaData mode=MD_OVERWRITE) const override
virtual size_t size() const =0
char * loop
Definition: metadata_base.h:97
basic_istream< char, std::char_traits< char > > istream
Definition: utilities.cpp:815
bool exists() const
std::vector< MDObject > getObjectsForActiveLabels() const
void replace(const MDLabel label, const String &oldStr, const String &newStr)
void init(const std::vector< MDLabel > &labelsVector)
Definition: metadata_db.cpp:59
void sort(struct DCEL_T *dcel)
Definition: sorting.cpp:18
size_t size() const override
void addIndex(MDLabel label) const
void selectPart(const MetaData &mdIn, size_t startPosition, size_t numberOfObjects, const MDLabel sortLabel=MDL_OBJID) override
void setColumnValues(const std::vector< MDObject > &valuesIn) override
#define j
std::vector< MDLabel > _activeLabels
Definition: metadata_db.h:68
virtual int size() const =0
bool setRow(const MDRow &row, size_t id)
int trunc(double x)
Definition: ap.cpp:7248
bool addLabel(const MDLabel label, int pos=-1) override
void importObjects(const MetaData &md, const std::vector< size_t > &objectsToAdd, bool doClear=true) override
size_t firstRowId() const override
virtual ~MetaDataDb()
void sort(MetaDataDb &MDin, const MDLabel sortLabel, bool asc=true, int limit=-1, int offset=0)
virtual void setColumnFormat(bool column)
void setValue(MDLabel label, const T &d, bool addLabel=true)
String escapeForRegularExpressions(const String &str)
MetaData & operator=(const MetaData &md)
bool setValue(const MDObject &mdValueIn, size_t id) override
Definition: metadata_db.cpp:90
FileName removeBlockName() const
void merge(const MetaData &md2)
size_t lastRowId() const override
std::string String
Definition: xmipp_strings.h:34
virtual bool addLabel(const MDLabel label, int pos=-1)=0
void copyColumn(MDLabel labelDest, MDLabel labelSrc) override
#define line_max
comment is wraped in char_max length lines
String formatString(const char *format,...)
void setValue(const int &iv)
void toStream(std::ostream &os, bool withFormat=false, bool isSql=false, bool escape=true) const
void read(const FileName &inFile, const std::vector< MDLabel > *desiredLabels=NULL, bool decomposeStack=true) override
int textToInteger(const char *str)
void join1(const MetaDataDb &mdInLeft, const MetaDataDb &mdInRight, const MDLabel label, JoinType type=LEFT)
void removeIndex(MDLabel label)
virtual std::vector< MDLabel > getActiveLabels() const =0
void unionAll(const MetaDataDb &mdIn)
void writeDB(const FileName fn, const FileName blockname, WriteModeMetaData mode) const
void setColumnValues(const MDLabel label, const std::vector< T > &valuesIn)
bool execAddRow(const MDRow &row)
void(* obj)()
Environment PATH cannot be read.
Definition: xmipp_error.h:143
static void str2LabelVector(const String &labelsStr, std::vector< MDLabel > &labels)
static String label2Str(const MDLabel &label)
MDRowVec firstRow(const FileName &fnMetadata)
void _selectPart(const MetaDataDb &mdIn, size_t startPosition, size_t numberOfObjects, const MDLabel sortLabel=MDL_OBJID)
bool setValueCol(const MDObject &mdValueIn) override
static bool select(size_t rowId, sqlite3 *db, const std::string &table, std::vector< MDObject > &values)
Definition: sql_utils.cpp:87
virtual void readStar(const FileName &filename, const std::vector< MDLabel > *desiredLabels, const String &blockRegExp, bool decomposeStack)
void renameColumn(MDLabel oldLabel, MDLabel newLabel) override
Incorrect value received.
Definition: xmipp_error.h:195
MDRowSql getRowSql(size_t id)
#define DEFAULT_BLOCK_NAME
Definition: metadata_base.h:60
WriteModeMetaData
int * n
void fillRandom(MDLabel label, const String &mode, double op1, double op2, double op3=0.) override
MDLabel
int _precision
bool removeObject(size_t id) override
void _readRowsStar(mdBlock &block, std::vector< MDObject *> &columnValues, const std::vector< MDLabel > *desiredLabels) override
void indexSort(MultidimArray< int > &indx) const
virtual FileName getFilename() const
static bool addColumns(const std::vector< MDLabel > &columns, sqlite3 *db, const std::string &table)
Definition: sql_utils.cpp:31
void metadataToVec(std::vector< MDRowSql > &vd)
virtual void _parseObject(std::istream &is, MDObject &object, size_t id=BAD_OBJID)
bool isInStack() const
void writeText(const FileName fn, const std::vector< MDLabel > *desiredLabels) const override
#define BAD_OBJID
Definition: metadata_base.h:55
void split(size_t n, std::vector< MetaDataDb > &results, const MDLabel sortLabel=MDL_OBJID)
bool containsLabel(const MDLabel label) const override
Definition: metadata_db.h:305
double getColumnMax(MDLabel column)
A comment for this object /*** NOTE THIS IS A SPECIAL CASE AND SO IS TREATED ***/.