Xmipp  v3.23.11-Nereus
training_vector.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Alberto Pascual Montano (pascual@cnb.csic.es)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 //-----------------------------------------------------------------------------
27 // ClassicTrainingVectors.cc
28 //-----------------------------------------------------------------------------
29 
30 #include <cmath>
31 #include <fstream>
32 
33 #include "training_vector.h"
34 #include <core/args.h>
35 #include <core/metadata_vec.h>
36 
47 {
48  try
49  {
50  clear();
51  readSelf(_is);
52  }
53  catch (std::exception& e)
54  {
55  std::ostringstream msg;
56  msg << e.what() << std::endl << "Error reading the training vector";
57  throw std::runtime_error(msg.str());
58  }
59 }
60 
61 
62 
68 {
69 
70  calibrated(op1.calibrated());
71 
72  for (size_t i = 0; i < op1.size(); i++)
73  if (calibrated())
74  add(op1.itemAt(i), op1.targetAt(i));
75  else
76  add(op1.itemAt(i));
77 
78  normalized = op1.normalized;
79  varStats = op1.varStats;
80 }
81 
82 
83 
84 #ifdef UNUSED // detected as unused 29.6.2018
85 
88 unsigned ClassicTrainingVectors::featureSize() const
89 {
90  return itemAt(0).size();
91 }
92 #endif
93 
98 {
99  return itemAt(0).size();
100 }
101 
106 {
108  varStats.clear();
109  normalized = false;
110 }
111 
117 void ClassicTrainingVectors::printSelf(std::ostream& _os) const
118 {
119  _os << dimension() << " " << theItems.size() << std::endl;
121 }
122 
130 {
131 #ifndef _NO_EXCEPTION
132  try
133  {
134 #endif
135  clear();
136  std::string line;
137 
138  // Determines the number of rows and columns in the training set
139 
140  long dim;
141  long size;
142  _is >> dim;
143  _is >> line;
144  if (!sscanf(line.c_str(), "%ld", &size))
145  {
146  int x;
147  int y;
148  _is >> x;
149  _is >> y;
150  size = x * y;
151  }
152  getline(_is, line);
153  theItems.resize(size);
154  theTargets.resize(size);
155 
156  for (int i = 0; i < size; i++)
157  {
158  std::vector<floatFeature> v;
159  v.resize(dim);
160  for (int j = 0; j < dim; j++)
161  {
162  floatFeature var;
163  _is >> var;
164  v[j] = var;
165  }
166  getline(_is, line);
167  theItems[i] = v;
168  theTargets[i] = removeSpaces(line);
169  }
170 
171 #ifndef _NO_EXCEPTION
172  }
173  catch (std::exception& e)
174  {
175  std::ostringstream msg;
176  msg << e.what() << std::endl << "Error reading the training set";
177  throw std::runtime_error(msg.str());
178  }
179 #endif
180 }
181 
183 {
184  clear();
185 
186  // Read header and content
187  MetaDataVec vectorHeader(formatString("vectorHeader@%s",fnIn.c_str()));
188  MetaDataVec vectorContent(formatString("vectorContent@%s",fnIn.c_str()));
189  size_t Nvectors;
190  size_t vectorSize;
191  size_t id = vectorHeader.firstRowId();
192  vectorHeader.getValue(MDL_CLASSIFICATION_DATA_SIZE,vectorSize,id);
193  vectorHeader.getValue(MDL_COUNT,Nvectors,id);
194  theItems.reserve(Nvectors);
195  theTargets.reserve(Nvectors);
196 
197  // Read the data
198  FileName fnInRaw=formatString("%s.vec",fnIn.withoutExtension().c_str());
199  std::ifstream fhInRaw(fnInRaw.c_str(),std::ios::binary);
200  if (!fhInRaw)
201  REPORT_ERROR(ERR_IO_NOTEXIST,fnInRaw);
202  std::vector<floatFeature> v;
203  v.resize(vectorSize);
204  auto *buffer=new float[vectorSize];
205  String fnImg;
206  size_t order;
207 
208  for (size_t objId : vectorContent.ids())
209  {
210  vectorContent.getValue(MDL_IMAGE, fnImg, objId);
211  vectorContent.getValue(MDL_ORDER, order, objId);
212 
213  // Read raw values
214  fhInRaw.seekg(order*vectorSize*sizeof(float));
215  fhInRaw.read((char*)buffer,vectorSize*sizeof(float));
216  if (!fhInRaw)
218  formatString("Could not read image %lu from %s",
219  order,fnInRaw.c_str()));
220  for (size_t i=0; i<vectorSize; ++i)
221  v[i]=buffer[i];
222  theTargets.push_back(fnImg);
223  theItems.push_back(v);
224  }
225  delete []buffer;
226 }
227 
233 void ClassicTrainingVectors::saveObject(std::ostream& _os) const
234 {
235  _os << dimension() << std::endl;
236  _os << normalized << std::endl;
237  if (normalized)
238  for (size_t i = 0; i < varStats.size(); i++)
239  {
240  _os << varStats[i].mean << std::endl;
241  _os << varStats[i].sd << std::endl;
242  }
244 }
245 
246 
253 {
254  clear();
255  int dim;
256  _is >> dim;
257  _is >> normalized;
258  if (normalized)
259  varStats.clear();
260  varStats.resize(dim);
261  for (size_t i = 0; i < varStats.size(); i++)
262  {
263  _is >> varStats[i].mean;
264  _is >> varStats[i].sd;
265  }
267 }
268 
269 
270 #ifdef UNUSED // detected as unused 29.6.2018
271 
276 void ClassicTrainingVectors::deleteVariable(int _var)
277 {
278  for (unsigned int it = 0; it < size(); it++)
279  itemAt(it).erase(itemAt(it).begin() + _var);
280 }
281 #endif
282 
288 {
289 
290  // This avoids memory leakage in assignments like v=v
291  if (&op1 != this)
292  {
293 
294  calibrated(op1.calibrated());
295 
296  for (size_t i = 0; i < op1.size(); i++)
297  if (calibrated())
298  add(op1.itemAt(i), op1.targetAt(i));
299  else
300  add(op1.itemAt(i));
301 
302  normalized = op1.normalized;
303  varStats = op1.varStats;
304  }
305  return *this;
306 }
307 
308 #ifdef UNUSED // detected as unused 29.6.2018
309 
314 bool ClassicTrainingVectors::copyStructure(ClassicTrainingVectors& _ts)
315 {
316 
317  // check if set is just initialized but empty
318 
319  if ((&_ts == this) || (size() + itemAt(0).size() != 0)) return false;
320  calibrated(_ts.calibrated());
321  normalized = _ts.normalized;
322  varStats = _ts.varStats;
323  return true;
324 }
325 
332 bool ClassicTrainingVectors::insertRowFrom(ClassicTrainingVectors& _ts, unsigned int _idx)
333 {
334 
335  // just some validation, but not complete
336 
337  if (((&_ts == this) || (_idx > _ts.size())) ||
338  (itemAt(0).size() != _ts.itemAt(0).size()))
339  return false;
340 
341  if (calibrated())
342  add(_ts.itemAt(_idx), _ts.targetAt(_idx));
343  else
344  add(_ts.itemAt(_idx));
345  return true;
346 
347 }
348 
352 bool ClassicTrainingVectors::deleteRow(unsigned int _idx)
353 {
354  return remove(_idx);
355 }
356 #endif
357 
363 {
364  using namespace std;
365 
366  // Do some validation
367 
368  if (_i > itemAt(0).size())
369  {
370  std::ostringstream msg;
371  msg << "Out of range. No variable at position " << _i;
372  throw std::out_of_range(msg.str());
373  }
374 
375  // first calculates the mean
376  floatFeature mean = 0;
377  int nn = 0;
378  for (size_t it = 0; it < size(); it++)
379  {
380  if (!isnan(itemAt(it)[_i]))
381  {
382  mean += itemAt(it)[_i];
383  nn++;
384  }
385 
386  }
387  mean /= (floatFeature) nn;
388 
389  // Then calculates SD
390  floatFeature sd = 0;
391  for (size_t it = 0; it < size(); it++)
392  {
393  if (!isnan(itemAt(it)[_i]))
394  sd += (itemAt(it)[_i] - mean) * (itemAt(it)[_i] - mean);
395  }
396  sd = sqrt(sd / (floatFeature)(nn - 1));
397 
398  // Now normalize the variable
399  if (sd != 0)
400  {
401  for (size_t it = 0; it < size(); it++)
402  {
403  if (!isnan(itemAt(it)[_i]))
404  itemAt(it)[_i] = (itemAt(it)[_i] - mean) / sd;
405  }
406  }
407 
408  varStats[_i].mean = mean;
409  varStats[_i].sd = sd;
410 }
411 
412 
418 {
419  varStats.clear();
420  varStats.resize(itemAt(0).size());
421  for (unsigned i = 0; i < itemAt(0).size(); i++)
423  normalized = true;
424 }
425 
426 
432 {
433  using namespace std;
434  for (unsigned it = 0; it < size(); it++)
435  {
436  for (unsigned i = 0; i < itemAt(0).size(); i++)
437  {
438  if (!isnan(itemAt(it)[i]))
439  itemAt(it)[i] = itemAt(it)[i] * varStats[i].sd + varStats[i].mean;
440  }
441  }
442  varStats.clear();
443  normalized = false;
444 }
445 
446 #ifdef UNUSED // detected as unused 29.6.2018
447 
451 double ClassicTrainingVectors::getUnormalizedVar(unsigned _item, unsigned _var) const
452 {
453  using namespace std;
454  if (!normalized)
455  {
456  std::ostringstream msg;
457  msg << "Variable is not normalized" << _var;
458  throw std::runtime_error(msg.str());
459  }
460 
461  if (_var > itemAt(0).size())
462  {
463  std::ostringstream msg;
464  msg << "Out of range. No variable at position " << _var;
465  throw std::out_of_range(msg.str());
466  }
467 
468  if (_item > size())
469  {
470  std::ostringstream msg;
471  msg << "Out of range. No item at position " << _var;
472  throw std::out_of_range(msg.str());
473  }
474 
475  double t;
476  if (!isnan(itemAt(_item)[_var]))
477  t = (double) itemAt(_item)[_var] * varStats[_var].sd + varStats[_var].mean;
478 
479  return t;
480 }
481 #endif
482 
488 {
489  return normalized;
490 }
491 
492 
496 /* const std::vector<ClassicTrainingVectors::statsStruct>& ClassicTrainingVectors::getNormalizationInfo() const {
497  return varStats;
498  }*/
499 
500 
506 {
507  using namespace std;
508 
509  // Do some validation
510  if (_i > itemAt(0).size())
511  {
512  std::ostringstream msg;
513  msg << "Out of range. No variable at position " << _i;
514  throw std::out_of_range(msg.str());
515  }
516 
517  // first calculates the mean
518  _mean = 0;
519  int nn = 0;
520  for (size_t it = 0; it < size(); it++)
521  {
522  if (!isnan(itemAt(it)[_i]))
523  {
524  _mean += itemAt(it)[_i];
525  nn++;
526  }
527 
528  }
529  _mean /= (floatFeature) nn;
530 
531  // Then calculates SD
532  _sd = 0;
533  for (size_t it = 0; it < size(); it++)
534  {
535  if (!isnan(itemAt(it)[_i]))
536  _sd += (itemAt(it)[_i] - _mean) * (itemAt(it)[_i] - _mean);
537  }
538  _sd = sqrt(_sd / (floatFeature)(nn - 1));
539 }
540 
541 #ifdef UNUSED // detected as unused 29.6.2018
542 
546 ClassicTrainingVectors ClassicTrainingVectors::getStatVector()
547 {
548  ClassicTrainingVectors myStatVector;
549  myStatVector.theItems.resize(2);
550  myStatVector.theItems[0].resize(itemAt(0).size(), 0);
551  myStatVector.theItems[1].resize(itemAt(0).size(), 0);
552  myStatVector.theTargets.resize(2);
553  for (unsigned i = 0; i < itemAt(0).size(); i++)
554  getFeatureStats(i, myStatVector.theItems[0][i], myStatVector.theItems[1][i]);
555  myStatVector.theTargets[0] = "Average ";
556  myStatVector.theTargets[1] = "SD ";
557  return myStatVector;
558 }
559 #endif
560 
561 
562 //-----------------------------------------------------------------------------
563 
String removeSpaces(const String &_str)
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
void getFeatureStats(unsigned _i, floatFeature &_mean, floatFeature &_sd)
void sqrt(Image< double > &op)
HBITMAP buffer
Definition: svm-toy.cpp:37
static double * y
virtual void saveObject(std::ostream &_os) const
ClassicTrainingVectors(unsigned _vecSize=0, bool _calib=true)
void read(const FileName &fnIn)
float floatFeature
Definition: data_types.h:72
const Target & targetAt(unsigned _i) const
Definition: training_set.h:221
virtual IdIteratorProxy< false > ids()
ClassicTrainingVectors & operator=(const ClassicTrainingVectors &op1)
doublereal * x
#define i
virtual void printSelf(std::ostream &_os) const
Definition: training_set.h:330
std::vector< statsStruct > varStats
virtual void loadObject(std::istream &_is)
Number of elements of a type (int) [this is a genereic type do not use to transfer information to ano...
unsigned dimension() const
virtual void readSelf(std::istream &_is)
size_t firstRowId() const override
virtual void saveObject(std::ostream &_os) const
Definition: training_set.h:349
Size of data vectors for classification (int)
File or directory does not exist.
Definition: xmipp_error.h:136
Couldn&#39;t read from file.
Definition: xmipp_error.h:139
basic_istream< char, std::char_traits< char > > istream
Definition: utilities.cpp:815
virtual void normalizeFeature(unsigned _i)
#define j
virtual void printSelf(std::ostream &_os) const
bool getValue(MDObject &mdValueOut, size_t id) const override
FileName withoutExtension() const
std::string String
Definition: xmipp_strings.h:34
String formatString(const char *format,...)
virtual void loadObject(std::istream &_is)
Definition: training_set.h:360
const Item & itemAt(unsigned _i) const
Definition: training_set.h:264
Name of an image (std::string)
virtual void add(const FeatureVector &_i, const Label &_tg)
Definition: training_set.h:171