Xmipp  v3.23.11-Nereus
training_set.h
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Alberto Pascual Montano (pascual@cnb.csic.es)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 //-----------------------------------------------------------------------------
27 // ClassificationTrainingSet.hh
28 // Xmipp Classification Training Sets
29 //-----------------------------------------------------------------------------
30 
31 #ifndef XMIPPTS_H
32 #define XMIPPTS_H
33 
34 #include <cctype>
35 #include <vector>
36 #include <map>
37 #include <sstream>
38 #include <stdexcept>
39 
40 #include "data_types.h"
41 #include "vector_ops.h"
42 
43 #include <core/args.h>
44 
60 template<class Item, class Target>
62 {
63 public:
64 
66  typedef std::multimap<unsigned, unsigned, std::less<unsigned> > splitTS;
67 
69  typedef splitTS::iterator splitIt;
70 
73 
75  typedef enum { TRAIN = 0, VALIDATION = 1, TEST = 2 } useMode;
76 
77 protected:
80  /* std::vector<Item> theItems;
81  std::vector<Target> theTargets;*/
82  unsigned nTargets; // Number of targets in the training set
83 public:
84  std::vector<Item> theItems;
85  std::vector<Target> theTargets;
86 
92  ClassificationTrainingSet(const bool& _calib = true, unsigned _n = 0)
93  :isCalibrated(_calib), /* splitTrainingSet(), */nTargets(0), theItems(_n) /*theTargets(_n), */
94  {};
95 
101  ClassificationTrainingSet(std::istream & _is) : isCalibrated(false), splitTrainingSet(), theItems(), theTargets()
102  {
103  loadObject(_is);
105  };
106 
111  {};
112 
120  void setSplit(float _tp, float _vp)
121  {
122  if ((_tp > 1) || (_tp <= 0) || (_vp > 1) || (_vp <= 0) || (_tp + _vp > 1))
123  {
124  throw std::invalid_argument("Split mode proportions must be < 1");
125  }
126  std::vector<float> acc(2); // 3 modes
127  acc[TRAIN] = _tp;
128  acc[VALIDATION] = _tp + _vp;
129  std::random_device rd; //Will be used to obtain a seed for the random number engine
130  std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd()
131  std::uniform_real_distribution<> dist(0.f, 1.f);
132  for (unsigned i = 0; i < size(); i ++)
133  {
134  float aRnd = dist(gen);
135  unsigned rw = TEST;
136  for (unsigned j = TRAIN; j < TEST; j ++)
137  {
138  if (aRnd < acc[j])
139  {
140  rw = j;
141  break;
142  }
143  }
144  splitTrainingSet.insert(std::pair<unsigned, unsigned>(rw, i));
145  }
146  }
147 
148 
149 
154  splitIt beginSubset(unsigned _um)
155  {
156  return splitTrainingSet.lower_bound(_um);
157  }
158 
160  splitIt endSubset(unsigned _um)
161  {
162  return splitTrainingSet.upper_bound(_um);
163  }
164 
165 
171  virtual void add(const Item& _i, const Target& _tg/* = Target()*/)
172  {
173  theItems.push_back(_i);
174  theTargets.push_back(_tg);
175  };
176 
177 
178 
183  virtual void add(const Item& _i)
184  {
185  theItems.push_back(_i);
186  };
187 
188 
193  virtual bool remove(unsigned int _idx)
194  {
195 
196  if (_idx > theItems.size())
197  return false;
198 
199  theItems.erase(theItems.begin() + _idx);
200  if (isCalibrated)
201  theTargets.erase(theTargets.begin() + _idx);
202 
203  return true;
204  };
205 
206 
210  size_t size() const
211  {
212  return theItems.size();
213  };
214 
215 
221  const Target& targetAt(unsigned _i) const
222  {
223  if (!isCalibrated)
224  {
225  std::string msg;
226  msg = "The training set is not calibrated.";
227  throw std::out_of_range(msg);
228  }
229  if (_i >= size())
230  {
231  std::string msg;
232  msg = "Out of range. No target at position " + integerToString(_i);
233  throw std::out_of_range(msg);
234  }
235 
236  return theTargets[_i];
237  };
238 
239 
245  Target& targetAt(unsigned _i)
246  {
247  if (_i >= size())
248  {
249  std::string msg;
250  msg = "Out of range. No target at position " + integerToString(_i);
251  throw std::out_of_range(msg);
252  }
253 
254  return theTargets[_i];
255  };
256 
257 
258 
264  const Item& itemAt(unsigned _i) const
265  {
266  if (_i >= size())
267  {
268  std::string msg;
269  msg = "Out of range. No item at position " + integerToString(_i);
270  throw std::out_of_range(msg);
271  }
272 
273  return theItems[_i];
274  };
275 
276 
277 
284  Item& itemAt(unsigned _i)
285  {
286  if (_i >= size())
287  {
288  std::string msg;
289  msg = "Out of range. No item at position " + integerToString(_i);
290  throw std::out_of_range(msg);
291  }
292 
293  return theItems[_i];
294  };
295 
296 
297 
301  bool calibrated() const
302  {
303  return isCalibrated;
304  }
305 
306 
310  void calibrated(const bool& _calib)
311  {
312  isCalibrated = _calib;
313  };
314 
318  void clear()
319  {
320  theItems.clear();
321  theTargets.clear();
322  nTargets = 0;
323  };
324 
325 
330  virtual void printSelf(std::ostream& _os) const
331  {
332  writeItems(_os);
333  }
334 
341  virtual void readSelf(std::istream& _is)
342  {}
343 
349  virtual void saveObject(std::ostream& _os) const
350  {
351  writeCalibrated(_os);
352  writeItems(_os, true);
353  }
354 
360  virtual void loadObject(std::istream& _is)
361  {
362  clear();
363  // first of all, check if the training set is calibrated or not
364  checkCalibrated(_is);
365  //afterwards, we have to read the item
366  readItems(_is);
367  }
368 
373  unsigned numTargets() const
374  {
375  if (!calibrated())
376  throw std::runtime_error("TS not calibrated");
377  return nTargets;
378  }
379 
383  virtual bool swapItems(unsigned _i, unsigned _j)
384  {
385  if (_i > size() || _j > size() || _i == _j)
386  return false;
387  swap(theItems[_i], theItems[_j]);
388  if (isCalibrated)
389  swap(theTargets[_i], theTargets[_j]);
390  return true;
391  }
392 
397  friend std::ostream& operator << (std::ostream& _os, const ClassificationTrainingSet& _ts)
398  {
399  _ts.printSelf(_os);
400  return _os;
401  }
402 
410  {
411  _ts.readSelf(_is);
412  return _is;
413  }
414 
415 protected:
416 
421  {
422  if (calibrated())
423  {
424  typedef std::set< Target, std::less<Target> > STB;
425  STB targetSet;
426  for (unsigned i = 0; i < size(); i ++)
427  {
428  targetSet.insert(targetAt(i));
429  }
430  // Assign the number of targets
431  nTargets = targetSet.size();
432  }
433  else
434  nTargets = 0;
435  }
436 
443  {
444  skipComments(_is);
445 
446  if (_is)
447  {
448  std::string s;
449  _is >> s;
450 
451  // Comments skipped, read calibrated
452  if (_is)
453  {
454  std::string s2 = "";
455 
456  // uppercase s
457  for (std::string::iterator i = s.begin() ; i != s.end() ; i++)
458  s2 += toupper(*i);
459 
460  if (s2 == "CALIBRATED")
461  isCalibrated = true;
462  else
463  for (std::string::iterator i = s.end() ; i > s.begin() ; _is.putback(*(--i)));
464  }
465  }
466 
467  if (!_is)
468  {
469  std::string msg;
470  msg = "Error reading the file";
471  throw std::runtime_error(msg);
472  }
473  };
474 
475 
482  {
483  while (_is)
484  {
485  skipComments(_is);
486  if (_is)
487  {
488  Item item;
489  try
490  {
491  _is >> item;
492  }
493  catch (std::exception&)
494  {
495  std::string msg;
496  msg = "Error reading the item";
497  throw std::runtime_error(msg);
498  }
499 
500  if (_is)
501  {
502  theItems.push_back(item);
503  if (isCalibrated)
504  {
505  Target target;
506  try
507  {
508  char c;
509  if (_is) _is >> c;
510  // go back to the beginning of the line
511  if (_is) _is.putback(c);
512  // check for next line "<"
513  if (c == '<')
514  target = Target();
515  else
516  if (_is)
517  _is >> target;
518  else
519  target = Target();
520  }
521  catch (std::exception&)
522  {
523  std::string msg;
524  msg = "Error reading the item";
525  throw std::runtime_error(msg);
526  }
527 
528  theTargets.push_back(target);
529  }
530  else
531  theTargets.push_back(Target());
532  }
533  else
534  {
535  std::string msg;
536  msg = "Error reading the item";
537  throw std::runtime_error(msg);
538  }
539  }
540  }
541  };
542 
543 
544 
549  void writeCalibrated(std::ostream& _os) const
550  {
551  if (isCalibrated)
552  _os << "calibrated" << std::endl;
553  };
554 
555 
561  void writeItems(std::ostream& _os, bool _delim = false) const
562  {
563  typename std::vector<Item>::const_iterator i;
564  typename std::vector<Target>::const_iterator j;
565  for (i = theItems.begin(), j = theTargets.begin() ; i < theItems.end() ;
566  i++, j++)
567  {
568 
569  if (_delim)
570  _os << *i;
571  else
572  {
573  for (size_t d = 0; d < (*i).size(); d++)
574  {
575  _os << (*i)[d];
576  if (d != (*i).size() - 1) _os << " ";
577  }
578  }
579 
580  /* if (isCalibrated) {
581  if ((*j == " " || *j == "") && _delim)
582  _os << "|";
583  else
584  _os << " " << *j;
585  }*/
586  if (isCalibrated)
587  _os << " " << *j;
588 
589  _os << std::endl;
590  }
591  };
592 
593 
598  void skipComments(std::istream& _is) const
599  {
600  char c;
601  if (_is)
602  _is >> c;
603 
604  // check for comments
605  while (_is && c == '#')
606  {
607  // skip the comment
608  while (_is && _is.get() != '\n');
609 
610  // read beginning of next line
611  if (_is)
612  _is >> c;
613  }
614 
615  // go back to the beginning of the line
616  if (_is)
617  _is.putback(c);
618  };
619 
620 
624  typename std::vector<Item>::const_iterator itemsBegin() const
625  {
626  return theItems.begin();
627  }
628 
629 
633  typename std::vector<Item>::const_iterator itemsEnd() const
634  {
635  return theItems.end();
636  }
637 
638 
643  typename std::vector<Target>::const_iterator targetsBegin() const
644  {
645  return theTargets.begin();
646  }
647 
651  typename std::vector<Target>::const_iterator targetsEnd() const
652  {
653  return theTargets.end();
654  }
655 };
657 #endif
useMode
use of samples
Definition: training_set.h:75
void checkCalibrated(std::istream &_is)
Definition: training_set.h:442
splitIt beginSubset(unsigned _um)
Definition: training_set.h:154
std::vector< Item >::const_iterator itemsBegin() const
Definition: training_set.h:624
doublereal * c
splitMode
Ways the training set can be used.
Definition: training_set.h:72
friend std::ostream & operator<<(std::ostream &_os, const ClassificationTrainingSet &_ts)
Definition: training_set.h:397
Target & targetAt(unsigned _i)
Definition: training_set.h:245
virtual void add(const Item &_i)
Definition: training_set.h:183
splitTS::iterator splitIt
iterator
Definition: training_set.h:69
virtual bool swapItems(unsigned _i, unsigned _j)
Definition: training_set.h:383
void setSplit(float _tp, float _vp)
Definition: training_set.h:120
String integerToString(int I, int _width, char fill_with)
const Target & targetAt(unsigned _i) const
Definition: training_set.h:221
friend std::istream & operator>>(std::istream &_is, ClassificationTrainingSet &_ts)
Definition: training_set.h:409
#define i
virtual void printSelf(std::ostream &_os) const
Definition: training_set.h:330
doublereal * d
virtual void readSelf(std::istream &_is)
Definition: training_set.h:341
virtual ~ClassificationTrainingSet()
Definition: training_set.h:110
double * f
std::vector< Item > theItems
Definition: training_set.h:84
Item & itemAt(unsigned _i)
Definition: training_set.h:284
std::vector< Target >::const_iterator targetsEnd() const
Definition: training_set.h:651
virtual void saveObject(std::ostream &_os) const
Definition: training_set.h:349
unsigned numTargets() const
Definition: training_set.h:373
void readItems(std::istream &_is)
Definition: training_set.h:481
basic_istream< char, std::char_traits< char > > istream
Definition: utilities.cpp:815
void writeItems(std::ostream &_os, bool _delim=false) const
Definition: training_set.h:561
std::vector< Target > theTargets
Definition: training_set.h:85
#define j
std::vector< Item >::const_iterator itemsEnd() const
Definition: training_set.h:633
splitIt endSubset(unsigned _um)
Returns an iterator to the end of the subset.
Definition: training_set.h:160
void writeCalibrated(std::ostream &_os) const
Definition: training_set.h:549
void skipComments(std::istream &_is) const
Definition: training_set.h:598
void calibrated(const bool &_calib)
Definition: training_set.h:310
virtual void loadObject(std::istream &_is)
Definition: training_set.h:360
const Item & itemAt(unsigned _i) const
Definition: training_set.h:264
std::vector< Target >::const_iterator targetsBegin() const
Definition: training_set.h:643
ClassificationTrainingSet(std::istream &_is)
Definition: training_set.h:101
virtual void add(const Item &_i, const Target &_tg)
Definition: training_set.h:171
std::multimap< unsigned, unsigned, std::less< unsigned > > splitTS
Training sets mode.
Definition: training_set.h:66
ClassificationTrainingSet(const bool &_calib=true, unsigned _n=0)
Definition: training_set.h:92