Xmipp  v3.23.11-Nereus
Classes | Public Types | Public Member Functions | Protected Attributes | List of all members

#include <training_vector.h>

Inheritance diagram for ClassicTrainingVectors:
Inheritance graph
[legend]
Collaboration diagram for ClassicTrainingVectors:
Collaboration graph
[legend]

Classes

struct  stats
 

Public Types

typedef struct ClassicTrainingVectors::stats statsStruct
 
- Public Types inherited from ClassificationTrainingSet< FeatureVector, Label >
enum  splitMode
 Ways the training set can be used. More...
 
enum  useMode
 use of samples More...
 
typedef std::multimap< unsigned, unsigned, std::less< unsigned > > splitTS
 Training sets mode. More...
 
typedef splitTS::iterator splitIt
 iterator More...
 

Public Member Functions

 ClassicTrainingVectors (unsigned _vecSize=0, bool _calib=true)
 
 ClassicTrainingVectors (std::istream &_is)
 
 ClassicTrainingVectors (const ClassicTrainingVectors &op1)
 
virtual ~ClassicTrainingVectors ()
 
unsigned dimension () const
 
void clear ()
 
virtual void printSelf (std::ostream &_os) const
 
virtual void readSelf (std::istream &_is)
 
void read (const FileName &fnIn)
 
virtual void saveObject (std::ostream &_os) const
 
virtual void loadObject (std::istream &_is)
 
ClassicTrainingVectorsoperator= (const ClassicTrainingVectors &op1)
 
virtual void normalizeFeature (unsigned _i)
 
void normalize ()
 
virtual void unNormalize ()
 
bool isNormalized () const
 
virtual const std::vector< statsStruct > & getNormalizationInfo () const
 
void getFeatureStats (unsigned _i, floatFeature &_mean, floatFeature &_sd)
 
- Public Member Functions inherited from ClassificationTrainingSet< FeatureVector, Label >
 ClassificationTrainingSet (const bool &_calib=true, unsigned _n=0)
 
 ClassificationTrainingSet (std::istream &_is)
 
virtual ~ClassificationTrainingSet ()
 
void setSplit (float _tp, float _vp)
 
splitIt beginSubset (unsigned _um)
 
splitIt endSubset (unsigned _um)
 Returns an iterator to the end of the subset. More...
 
virtual void add (const FeatureVector &_i, const Label &_tg)
 
virtual void add (const FeatureVector &_i)
 
virtual bool remove (unsigned int _idx)
 
size_t size () const
 
const LabeltargetAt (unsigned _i) const
 
LabeltargetAt (unsigned _i)
 
const FeatureVectoritemAt (unsigned _i) const
 
FeatureVectoritemAt (unsigned _i)
 
bool calibrated () const
 
void calibrated (const bool &_calib)
 
void clear ()
 
unsigned numTargets () const
 
virtual bool swapItems (unsigned _i, unsigned _j)
 

Protected Attributes

std::vector< statsStructvarStats
 
bool normalized
 
- Protected Attributes inherited from ClassificationTrainingSet< FeatureVector, Label >
bool isCalibrated
 
splitTS splitTrainingSet
 
unsigned nTargets
 

Additional Inherited Members

- Public Attributes inherited from ClassificationTrainingSet< FeatureVector, Label >
std::vector< FeatureVectortheItems
 
std::vector< LabeltheTargets
 
- Protected Member Functions inherited from ClassificationTrainingSet< FeatureVector, Label >
void computeNumTargets ()
 
void checkCalibrated (std::istream &_is)
 
void readItems (std::istream &_is)
 
void writeCalibrated (std::ostream &_os) const
 
void writeItems (std::ostream &_os, bool _delim=false) const
 
void skipComments (std::istream &_is) const
 
std::vector< FeatureVector >::const_iterator itemsBegin () const
 
std::vector< FeatureVector >::const_iterator itemsEnd () const
 
std::vector< Label >::const_iterator targetsBegin () const
 
std::vector< Label >::const_iterator targetsEnd () const
 

Detailed Description

This class implements all the necessary functionality for classic training vectors.

Definition at line 46 of file training_vector.h.

Member Typedef Documentation

◆ statsStruct

Constructor & Destructor Documentation

◆ ClassicTrainingVectors() [1/3]

ClassicTrainingVectors::ClassicTrainingVectors ( unsigned  _vecSize = 0,
bool  _calib = true 
)
inline

Default constructor Parameter: _vecSize Vector dimension; required to dim the feature and types vector Parameter: _calib calibration which should be true if the data set has labels

Definition at line 63 of file training_vector.h.

◆ ClassicTrainingVectors() [2/3]

ClassicTrainingVectors::ClassicTrainingVectors ( std::istream &  _is)

Constructs a training set given a stream Parameter: _is The input stream

Exceptions
runtime_errorIf there are problems with the stream

TrainingSet for ClassicTrainingVectors Constructs a training set given a stream Parameter: _is The input stream

Exceptions
runtime_errorIf there are problems with the stream

Definition at line 46 of file training_vector.cpp.

47 {
48  try
49  {
50  clear();
51  readSelf(_is);
52  }
53  catch (std::exception& e)
54  {
55  std::ostringstream msg;
56  msg << e.what() << std::endl << "Error reading the training vector";
57  throw std::runtime_error(msg.str());
58  }
59 }
virtual void readSelf(std::istream &_is)

◆ ClassicTrainingVectors() [3/3]

ClassicTrainingVectors::ClassicTrainingVectors ( const ClassicTrainingVectors op1)

Copy Constructor. Useful when returning a ClassicTrainingVectors Class. Parameter: op1 ClassicTrainingVectors

Definition at line 67 of file training_vector.cpp.

68 {
69 
70  calibrated(op1.calibrated());
71 
72  for (size_t i = 0; i < op1.size(); i++)
73  if (calibrated())
74  add(op1.itemAt(i), op1.targetAt(i));
75  else
76  add(op1.itemAt(i));
77 
78  normalized = op1.normalized;
79  varStats = op1.varStats;
80 }
const Target & targetAt(unsigned _i) const
Definition: training_set.h:221
#define i
std::vector< statsStruct > varStats
const Item & itemAt(unsigned _i) const
Definition: training_set.h:264
virtual void add(const FeatureVector &_i, const Label &_tg)
Definition: training_set.h:171

◆ ~ClassicTrainingVectors()

virtual ClassicTrainingVectors::~ClassicTrainingVectors ( )
inlinevirtual

Virtual destructor

Definition at line 85 of file training_vector.h.

86  {}

Member Function Documentation

◆ clear()

void ClassicTrainingVectors::clear ( )

Clears the training set

Definition at line 105 of file training_vector.cpp.

106 {
108  varStats.clear();
109  normalized = false;
110 }
std::vector< statsStruct > varStats

◆ dimension()

unsigned ClassicTrainingVectors::dimension ( ) const

Returns the dimension of the vectors (number of features)

Returns dimension (the same as above)

Definition at line 97 of file training_vector.cpp.

98 {
99  return itemAt(0).size();
100 }
const FeatureVector & itemAt(unsigned _i) const
Definition: training_set.h:264

◆ getFeatureStats()

void ClassicTrainingVectors::getFeatureStats ( unsigned  _i,
floatFeature _mean,
floatFeature _sd 
)

Calcualtes the average and SD of a feature in the training set Parameter: _i The index to the feature

Returns a const reference to the normalization vector Calcualtes the average and SD of a feature in the training set Parameter: _i The index to the feature

Definition at line 505 of file training_vector.cpp.

506 {
507  using namespace std;
508 
509  // Do some validation
510  if (_i > itemAt(0).size())
511  {
512  std::ostringstream msg;
513  msg << "Out of range. No variable at position " << _i;
514  throw std::out_of_range(msg.str());
515  }
516 
517  // first calculates the mean
518  _mean = 0;
519  int nn = 0;
520  for (size_t it = 0; it < size(); it++)
521  {
522  if (!isnan(itemAt(it)[_i]))
523  {
524  _mean += itemAt(it)[_i];
525  nn++;
526  }
527 
528  }
529  _mean /= (floatFeature) nn;
530 
531  // Then calculates SD
532  _sd = 0;
533  for (size_t it = 0; it < size(); it++)
534  {
535  if (!isnan(itemAt(it)[_i]))
536  _sd += (itemAt(it)[_i] - _mean) * (itemAt(it)[_i] - _mean);
537  }
538  _sd = sqrt(_sd / (floatFeature)(nn - 1));
539 }
void sqrt(Image< double > &op)
float floatFeature
Definition: data_types.h:72
const FeatureVector & itemAt(unsigned _i) const
Definition: training_set.h:264

◆ getNormalizationInfo()

virtual const std::vector<statsStruct>& ClassicTrainingVectors::getNormalizationInfo ( ) const
inlinevirtual

Returns a const reference to the normalization vector

Definition at line 205 of file training_vector.h.

206  {
207  return varStats;
208  };
std::vector< statsStruct > varStats

◆ isNormalized()

bool ClassicTrainingVectors::isNormalized ( ) const

Returns TRUE if recordset is normalized.

Definition at line 487 of file training_vector.cpp.

488 {
489  return normalized;
490 }

◆ loadObject()

void ClassicTrainingVectors::loadObject ( std::istream &  _is)
virtual

Loads the class from a stream. this method can be used to load the status of the class. Parameter: _is The output stream

Reimplemented from ClassificationTrainingSet< FeatureVector, Label >.

Definition at line 252 of file training_vector.cpp.

253 {
254  clear();
255  int dim;
256  _is >> dim;
257  _is >> normalized;
258  if (normalized)
259  varStats.clear();
260  varStats.resize(dim);
261  for (size_t i = 0; i < varStats.size(); i++)
262  {
263  _is >> varStats[i].mean;
264  _is >> varStats[i].sd;
265  }
267 }
#define i
std::vector< statsStruct > varStats
basic_istream< char, std::char_traits< char > > istream
Definition: utilities.cpp:815
virtual void loadObject(std::istream &_is)
Definition: training_set.h:360

◆ normalize()

void ClassicTrainingVectors::normalize ( )

Normalize all features in the training set

Definition at line 417 of file training_vector.cpp.

418 {
419  varStats.clear();
420  varStats.resize(itemAt(0).size());
421  for (unsigned i = 0; i < itemAt(0).size(); i++)
423  normalized = true;
424 }
#define i
std::vector< statsStruct > varStats
virtual void normalizeFeature(unsigned _i)
const FeatureVector & itemAt(unsigned _i) const
Definition: training_set.h:264

◆ normalizeFeature()

void ClassicTrainingVectors::normalizeFeature ( unsigned  _i)
virtual

Normalize a feature in the training set Parameter: _i The index to the feature

Normalize all features in the training set Parameter: _i The index to the feature

Definition at line 362 of file training_vector.cpp.

363 {
364  using namespace std;
365 
366  // Do some validation
367 
368  if (_i > itemAt(0).size())
369  {
370  std::ostringstream msg;
371  msg << "Out of range. No variable at position " << _i;
372  throw std::out_of_range(msg.str());
373  }
374 
375  // first calculates the mean
376  floatFeature mean = 0;
377  int nn = 0;
378  for (size_t it = 0; it < size(); it++)
379  {
380  if (!isnan(itemAt(it)[_i]))
381  {
382  mean += itemAt(it)[_i];
383  nn++;
384  }
385 
386  }
387  mean /= (floatFeature) nn;
388 
389  // Then calculates SD
390  floatFeature sd = 0;
391  for (size_t it = 0; it < size(); it++)
392  {
393  if (!isnan(itemAt(it)[_i]))
394  sd += (itemAt(it)[_i] - mean) * (itemAt(it)[_i] - mean);
395  }
396  sd = sqrt(sd / (floatFeature)(nn - 1));
397 
398  // Now normalize the variable
399  if (sd != 0)
400  {
401  for (size_t it = 0; it < size(); it++)
402  {
403  if (!isnan(itemAt(it)[_i]))
404  itemAt(it)[_i] = (itemAt(it)[_i] - mean) / sd;
405  }
406  }
407 
408  varStats[_i].mean = mean;
409  varStats[_i].sd = sd;
410 }
void sqrt(Image< double > &op)
float floatFeature
Definition: data_types.h:72
std::vector< statsStruct > varStats
const FeatureVector & itemAt(unsigned _i) const
Definition: training_set.h:264

◆ operator=()

ClassicTrainingVectors & ClassicTrainingVectors::operator= ( const ClassicTrainingVectors op1)

Operator "=" Parameter: op1 ClassicTrainingVectors

Definition at line 287 of file training_vector.cpp.

288 {
289 
290  // This avoids memory leakage in assignments like v=v
291  if (&op1 != this)
292  {
293 
294  calibrated(op1.calibrated());
295 
296  for (size_t i = 0; i < op1.size(); i++)
297  if (calibrated())
298  add(op1.itemAt(i), op1.targetAt(i));
299  else
300  add(op1.itemAt(i));
301 
302  normalized = op1.normalized;
303  varStats = op1.varStats;
304  }
305  return *this;
306 }
const Target & targetAt(unsigned _i) const
Definition: training_set.h:221
#define i
std::vector< statsStruct > varStats
const Item & itemAt(unsigned _i) const
Definition: training_set.h:264
virtual void add(const FeatureVector &_i, const Label &_tg)
Definition: training_set.h:171

◆ printSelf()

void ClassicTrainingVectors::printSelf ( std::ostream &  _os) const
virtual

Standard output for a training set Parameter: _os The output stream

Standard output for a training set Parameter: _os The output stream Parameter: _ts The training set to be printed

Reimplemented from ClassificationTrainingSet< FeatureVector, Label >.

Definition at line 117 of file training_vector.cpp.

118 {
119  _os << dimension() << " " << theItems.size() << std::endl;
121 }
virtual void printSelf(std::ostream &_os) const
Definition: training_set.h:330
unsigned dimension() const

◆ read()

void ClassicTrainingVectors::read ( const FileName fnIn)

Read data from a metadata.

Definition at line 182 of file training_vector.cpp.

183 {
184  clear();
185 
186  // Read header and content
187  MetaDataVec vectorHeader(formatString("vectorHeader@%s",fnIn.c_str()));
188  MetaDataVec vectorContent(formatString("vectorContent@%s",fnIn.c_str()));
189  size_t Nvectors;
190  size_t vectorSize;
191  size_t id = vectorHeader.firstRowId();
192  vectorHeader.getValue(MDL_CLASSIFICATION_DATA_SIZE,vectorSize,id);
193  vectorHeader.getValue(MDL_COUNT,Nvectors,id);
194  theItems.reserve(Nvectors);
195  theTargets.reserve(Nvectors);
196 
197  // Read the data
198  FileName fnInRaw=formatString("%s.vec",fnIn.withoutExtension().c_str());
199  std::ifstream fhInRaw(fnInRaw.c_str(),std::ios::binary);
200  if (!fhInRaw)
201  REPORT_ERROR(ERR_IO_NOTEXIST,fnInRaw);
202  std::vector<floatFeature> v;
203  v.resize(vectorSize);
204  auto *buffer=new float[vectorSize];
205  String fnImg;
206  size_t order;
207 
208  for (size_t objId : vectorContent.ids())
209  {
210  vectorContent.getValue(MDL_IMAGE, fnImg, objId);
211  vectorContent.getValue(MDL_ORDER, order, objId);
212 
213  // Read raw values
214  fhInRaw.seekg(order*vectorSize*sizeof(float));
215  fhInRaw.read((char*)buffer,vectorSize*sizeof(float));
216  if (!fhInRaw)
218  formatString("Could not read image %lu from %s",
219  order,fnInRaw.c_str()));
220  for (size_t i=0; i<vectorSize; ++i)
221  v[i]=buffer[i];
222  theTargets.push_back(fnImg);
223  theItems.push_back(v);
224  }
225  delete []buffer;
226 }
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
HBITMAP buffer
Definition: svm-toy.cpp:37
#define i
Number of elements of a type (int) [this is a genereic type do not use to transfer information to ano...
Size of data vectors for classification (int)
File or directory does not exist.
Definition: xmipp_error.h:136
Couldn&#39;t read from file.
Definition: xmipp_error.h:139
FileName withoutExtension() const
std::string String
Definition: xmipp_strings.h:34
int order
Derivation order and Bessel function order.
Definition: blobs.h:118
String formatString(const char *format,...)
Name of an image (std::string)

◆ readSelf()

void ClassicTrainingVectors::readSelf ( std::istream &  _is)
virtual

Standard input for a training set Parameter: _is The input stream

Exceptions
runtime_errorIf there are problems with the stream

Standard input for a training set Parameter: _is The input stream Parameter: _ts The training set to be read

Exceptions
runtime_errorIf there are problems with the stream

Reimplemented from ClassificationTrainingSet< FeatureVector, Label >.

Definition at line 129 of file training_vector.cpp.

130 {
131 #ifndef _NO_EXCEPTION
132  try
133  {
134 #endif
135  clear();
136  std::string line;
137 
138  // Determines the number of rows and columns in the training set
139 
140  long dim;
141  long size;
142  _is >> dim;
143  _is >> line;
144  if (!sscanf(line.c_str(), "%ld", &size))
145  {
146  int x;
147  int y;
148  _is >> x;
149  _is >> y;
150  size = x * y;
151  }
152  getline(_is, line);
153  theItems.resize(size);
154  theTargets.resize(size);
155 
156  for (int i = 0; i < size; i++)
157  {
158  std::vector<floatFeature> v;
159  v.resize(dim);
160  for (int j = 0; j < dim; j++)
161  {
162  floatFeature var;
163  _is >> var;
164  v[j] = var;
165  }
166  getline(_is, line);
167  theItems[i] = v;
168  theTargets[i] = removeSpaces(line);
169  }
170 
171 #ifndef _NO_EXCEPTION
172  }
173  catch (std::exception& e)
174  {
175  std::ostringstream msg;
176  msg << e.what() << std::endl << "Error reading the training set";
177  throw std::runtime_error(msg.str());
178  }
179 #endif
180 }
String removeSpaces(const String &_str)
static double * y
float floatFeature
Definition: data_types.h:72
doublereal * x
#define i
#define j

◆ saveObject()

void ClassicTrainingVectors::saveObject ( std::ostream &  _os) const
virtual

Saves the class into a stream. this method can be used to save the status of the class. Parameter: _os The output stream

Reimplemented from ClassificationTrainingSet< FeatureVector, Label >.

Definition at line 233 of file training_vector.cpp.

234 {
235  _os << dimension() << std::endl;
236  _os << normalized << std::endl;
237  if (normalized)
238  for (size_t i = 0; i < varStats.size(); i++)
239  {
240  _os << varStats[i].mean << std::endl;
241  _os << varStats[i].sd << std::endl;
242  }
244 }
#define i
std::vector< statsStruct > varStats
unsigned dimension() const
virtual void saveObject(std::ostream &_os) const
Definition: training_set.h:349

◆ unNormalize()

void ClassicTrainingVectors::unNormalize ( )
virtual

UnNormalize all features in the training set

Definition at line 431 of file training_vector.cpp.

432 {
433  using namespace std;
434  for (unsigned it = 0; it < size(); it++)
435  {
436  for (unsigned i = 0; i < itemAt(0).size(); i++)
437  {
438  if (!isnan(itemAt(it)[i]))
439  itemAt(it)[i] = itemAt(it)[i] * varStats[i].sd + varStats[i].mean;
440  }
441  }
442  varStats.clear();
443  normalized = false;
444 }
#define i
std::vector< statsStruct > varStats
const FeatureVector & itemAt(unsigned _i) const
Definition: training_set.h:264

Member Data Documentation

◆ normalized

bool ClassicTrainingVectors::normalized
protected

Definition at line 226 of file training_vector.h.

◆ varStats

std::vector<statsStruct> ClassicTrainingVectors::varStats
protected

Definition at line 225 of file training_vector.h.


The documentation for this class was generated from the following files: