Xmipp  v3.23.11-Nereus
gaussian_kerdensom.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Alberto Pascual Montano (pascual@cnb.csic.es)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 //-----------------------------------------------------------------------------
27 // GaussianKerDenSOM.cc
28 // Implements Smoothly Distributed Kernel Probability Density Estimator Self-Organizing Map
29 // Uses a Gaussian Kernel Function.
30 //-----------------------------------------------------------------------------
31 
32 #include <fstream>
33 #include <ctime>
34 
35 #include "gaussian_kerdensom.h"
36 #include <core/metadata_vec.h>
37 
38 constexpr signed int MAXZ = -11282;
39 
40 //-----------------------------------------------------------------------------
48 void GaussianKerDenSOM::train(FuzzyMap& _som, TS& _examples, FileName& _fn,
49  bool _update, double _sigma, bool _saveIntermediate)
50 {
51  MetaDataVec MDconvergence;
52  FileName tmpN;
53  numNeurons = _som.size();
54  numVectors = _examples.size();
55  dim = _examples.theItems[0].size();
56  tmpV.resize(dim, 0.);
57  tmpDens.resize(numNeurons, 0.);
58  tmpMap.resize(numNeurons);
59  for (size_t i = 0; i < numNeurons; i++)
60  tmpMap[i].resize(dim, 0.);
61  tmpD.resize(numNeurons);
62  tmpD1.resize(numNeurons);
63  double stopError;
64 
65  int verbosity = listener->getVerbosity();
66  if (verbosity)
67  listener->OnReportOperation((std::string) "\nTraining KerDenSOM....\n");
68 
69  // Initialization
70  if (verbosity)
71  listener->OnReportOperation((std::string) "\nInitializing....\n");
72  if (!_update)
73  {
74  initU(&_som);
75  updateV1(&_som, &_examples);
76  sigma = updateSigmaI(&_som, &_examples);
77  }
78  else
79  {
80  double alpha;
81  sigma = _sigma;
82  if (_sigma == 0.0)
83  {
84  updateU1(&_som, &_examples);
85  sigma = updateSigmaI(&_som, &_examples);
86  }
87  updateU(&_som, &_examples, sigma, alpha);
88  }
89 
90 
91  double regtmp;
92  double tmpregMax;
93  double tmpregMin;
94  double pen;
95  double lkhood;
96  tmpregMax = log(reg0);
97  tmpregMin = log(reg1);
98 
99  if (annSteps == 0)
100  annSteps = 1;
101  for (size_t iter = 0; iter < annSteps; iter++)
102  {
103 
104  if (verbosity)
105  {
106  if (annSteps > 1)
107  {
108  char s[100];
109  sprintf(s, "\nTraining Deterministic Annealing step %d of %d....\n", (int)(iter + 1), (int)annSteps);
110  listener->OnReportOperation((std::string) s);
111  }
112  else
113  listener->OnReportOperation((std::string) "Training ....\n");
114  }
115 
116  if (annSteps == 1)
117  regtmp = reg1;
118  else
119  regtmp = exp(tmpregMax - iter * (tmpregMax - tmpregMin) / (annSteps - 1));
120  stopError = mainIterations(&_som, &_examples, sigma, regtmp);
121  if (verbosity)
122  listener->OnReportOperation((std::string) "Calculating cost function....\n");
123  double funct = functional(&_examples, &_som, sigma, reg1, lkhood, pen);
124 
125  size_t id=MDconvergence.addObject();
126  MDconvergence.setValue(MDL_KERDENSOM_REGULARIZATION,regtmp,id);
127  MDconvergence.setValue(MDL_KERDENSOM_FUNCTIONAL,funct,id);
128  MDconvergence.setValue(MDL_KERDENSOM_SIGMA,sigma,id);
129 
130  if (verbosity)
131  {
132  char s[100];
133  sprintf(s, "Code vectors variation: %g\n", stopError);
134  listener->OnReportOperation((std::string) s);
135  }
136 
137 
138  if (annSteps > 1)
139  {
140  // Classifying
141  if (verbosity)
142  listener->OnReportOperation((std::string) "Classifying....\n");
143  _som.classify(&_examples);
144 
145  // Calibrating
146  if (verbosity)
147  listener->OnReportOperation((std::string) "Calibrating....\n");
148  _som.calibrate(_examples);
149 
150  if (_examples.isNormalized())
151  {
152  if (verbosity)
153  listener->OnReportOperation((std::string) "Denormalizing code vectors....\n");
154  _som.unNormalize(_examples.getNormalizationInfo()); // de-normalize codevectors
155  }
156 
157  if (_saveIntermediate)
158  {
159  // Saves each codebook (for all iterations)
160  tmpN = _fn.c_str() + (std::string) "_" + integerToString(iter) + (std::string) ".cod";
161  if (verbosity)
162  listener->OnReportOperation((std::string) "Saving code vectors....\n");
163  std::ofstream codS(tmpN.c_str());
164  codS << _som;
165  codS.flush();
166 
167  // save .his file (Histogram)
168  if (verbosity)
169  listener->OnReportOperation((std::string) "Saving histogram file....\n");
170  tmpN = _fn.c_str() + (std::string) "_" + integerToString(iter) + (std::string) ".his";
171  std::ofstream hisStream(tmpN.c_str());
172  _som.printHistogram(hisStream);
173  hisStream.flush();
174 
175  // save .err file (Average Quantization Error)
176  if (verbosity)
177  listener->OnReportOperation((std::string) "Saving Average Quantization Error file....\n");
178  tmpN = _fn.c_str() + (std::string) "_" + integerToString(iter) + (std::string) ".err";
179  std::ofstream errStream(tmpN.c_str());
180  _som.printQuantError(errStream);
181  errStream.flush();
182 
183  // save .vs file to be compatible with SOM_PAK
184  if (verbosity)
185  listener->OnReportOperation((std::string) "Saving visual file....\n");
186  tmpN = _fn.c_str() + (std::string) "_" + integerToString(iter) + (std::string) ".vs";
187  std::ofstream vsStream(tmpN.c_str());
188  vsStream << _examples.theItems[0].size() << " " << _som.layout() << " " << _som.width() << " " << _som.height() << " gaussian" << std::endl;
189  for (size_t i = 0; i < _examples.size(); i++)
190  {
191  int j = _som.fuzzyWinner(i);
192  vsStream << _som.indexToPos(j).first << " " << _som.indexToPos(j).second << " " << _som.memb[i][j] << " " << _examples.theTargets[i] << std::endl;
193  }
194  vsStream.flush();
195 
196  // save .inf file
197  if (verbosity)
198  listener->OnReportOperation((std::string) "Saving inf file....\n");
199  tmpN = _fn.c_str() + (std::string) "_" + integerToString(iter) + (std::string) ".inf";
200  std::ofstream infS(tmpN.c_str());
201  infS << "Kernel Probability Density Estimator SOM algorithm" << std::endl << std::endl;
202  infS << "Deterministic annealing step " << iter + 1 << " out of " << annSteps << std::endl;
203  infS << "Number of feature vectors: " << _examples.size() << std::endl;
204  infS << "Number of variables: " << _examples.theItems[0].size() << std::endl;
205  infS << "Horizontal dimension (Xdim) = " << _som.width() << std::endl;
206  infS << "Vertical dimension (Ydim) = " << _som.height() << std::endl;
207  if (_examples.isNormalized())
208  infS << "Input data normalized" << std::endl;
209  else
210  infS << "Input data not normalized" << std::endl;
211  if (_som.layout() == "rect")
212  infS << "Rectangular topology " << std::endl;
213  else
214  infS << "Hexagonal topology " << std::endl;
215  infS << "Gaussian Kernel function " << std::endl;
216  infS << "Total number of iterations = " << somNSteps << std::endl;
217  infS << "Stopping criteria (eps) = " << epsilon << std::endl << std::endl ;
218 
219  infS << "Smoothness factor (regularization) = " << regtmp << std::endl;
220  infS << "Functional value = " << funct << std::endl;
221  infS << "Sigma (Kernel width) = " << sigma << std::endl;
222  infS.flush();
223  }
224 
225  if (_examples.isNormalized())
226  {
227  if (verbosity)
228  listener->OnReportOperation((std::string) "Normalizing code vectors....\n");
229  _som.Normalize(_examples.getNormalizationInfo()); // normalize code vectors
230  }
231  } // if annSteps
232 
233  } // for
234  MDconvergence.write(formatString("KerDenSOM_Convergence@%s",_fn.c_str()),MD_APPEND);
235 
236  tmpV.clear();
237  tmpDens.clear();
238  tmpMap.clear();
239  tmpD.clear();
240  tmpD1.clear();
241 
242 }
243 
244 //-----------------------------------------------------------------------------
248 double GaussianKerDenSOM::updateU(FuzzyMap* _som, const TS* _examples,
249  const double& _sigma, double& _alpha)
250 {
251  // Create auxiliar stuff
252  double auxDist;
253  double rr2;
254  double max1;
255  double d1;
256  double tmp;
257  double r1;
258 
259  double irr1 =1.0/( 2.0 * _sigma);
260  _alpha = 0;
261 
262  // Update Membership matrix
263  double *ptrTmpD=&tmpD[0];
264  double *ptrTmpD1=&tmpD1[0];
265  double idim=1.0/dim;
266  for (size_t k = 0; k < numVectors; k++)
267  {
268  max1 = -MAXFLOAT;
269  const floatFeature *ptrExample=&(_examples->theItems[k][0]);
270  for (size_t i = 0; i < numNeurons; i ++)
271  {
272  auxDist = 0;
273  const floatFeature *ptrCodeVector=&(_som->theItems[i][0]);
274  for (size_t j = 0; j < dim; j++)
275  {
276  double tmp=((double)ptrExample[j] - (double)ptrCodeVector[j]);
277  auxDist += tmp * tmp;
278  }
279  auxDist*=idim;
280  ptrTmpD[i] = auxDist;
281  rr2 = -auxDist * irr1;
282  ptrTmpD1[i] = rr2;
283  if (max1 < rr2)
284  max1 = rr2;
285  }
286  r1 = 0;
287  for (size_t j = 0; j < numNeurons; j ++)
288  {
289  rr2 = ptrTmpD1[j] - max1;
290  if (rr2 < MAXZ)
291  d1 = 0;
292  else
293  d1 = (double)exp(rr2);
294  r1 += d1;
295  ptrTmpD1[j] = d1;
296  }
297  double ir1=1.0/r1;
298 
299  floatFeature *ptrSomMembK=&(_som->memb[k][0]);
300  for (size_t j = 0; j < numNeurons; j ++)
301  {
302  tmp = ptrTmpD1[j] * ir1;
303  ptrSomMembK[j] = (floatFeature) tmp;
304  _alpha += tmp * ptrTmpD[j];
305  }
306  } // for k
307  return 0.0;
308 }
309 
310 //-----------------------------------------------------------------------------
311 
312 // Estimate Sigma part II
313 double GaussianKerDenSOM::updateSigmaII(FuzzyMap* _som, const TS* _examples, const double& _reg, const double& _alpha)
314 {
315  size_t cc;
316  size_t j;
317 
318  if (_reg == 0)
319  return(_alpha / (double)(numVectors*dim));
320 
321  // Computing Sigma (Part II)
322  double q = 0.;
323  for (cc = 0; cc < numNeurons; cc++)
324  {
325  _som->localAve(_som->indexToPos(cc), tmpV);
326  auto t1 = (double) _som->getLayout().numNeig(_som, (SomPos) _som->indexToPos(cc));
327  for (j = 0; j < dim; j++)
328  q += (t1 * ((double)_som->theItems[cc][j]) - (double)(tmpV[j])) * ((double)_som->theItems[cc][j]);
329  }
330  return (double)((_alpha + _reg*q) / (double)(numVectors*dim));
331 }
332 
333 
334 //-----------------------------------------------------------------------------
335 
339 double GaussianKerDenSOM::codeDens(const FuzzyMap* _som, const FeatureVector* _example, double _sigma) const
340 {
341  double s = 0;
342  double K=-1.0/(2*_sigma);
343  const floatFeature *ptrExample=&((*_example)[0]);
344  for (size_t cc = 0; cc < numNeurons; cc++)
345  {
346  double t = 0;
347  const floatFeature *ptrCodeVector=&(_som->theItems[cc][0]);
348  for (size_t j = 0; j < dim; j++)
349  {
350  double diff=(double)(ptrExample[j]) - (double)(ptrCodeVector[j]);
351  t += diff*diff;
352  }
353  t *= K;
354  if (t < MAXZ)
355  t = 0;
356  else
357  t = exp(t);
358  s += t;
359  }
360  return std::pow(2*PI*_sigma, -0.5*dim)*s / numNeurons;
361 }
362 
363 //-----------------------------------------------------------------------------
364 #ifdef UNUSED // detected as unused 29.6.2018
365 
368 double GaussianKerDenSOM::dataDens(const TS* _examples, const FeatureVector* _example, double _sigma) const
369 {
370  unsigned j, vv;
371  double t = 0, s = 0;
372  for (vv = 0; vv < numVectors; vv++)
373  {
374  t = 0;
375  for (j = 0; j < dim; j++)
376  t += ((double)((*_example)[j]) - (double)(_examples->theItems[vv][j])) * ((double)((*_example)[j]) - (double)(_examples->theItems[vv][j]));
377  t = -t / (2.0 * _sigma);
378  if (t < MAXZ)
379  t = 0;
380  else
381  t = exp(t);
382  s += t;
383  }
384  double tmp = (double) dim / 2.0;
385  return (double)(pow((double)(2*PI*_sigma), -tmp)*s / (double)numVectors);
386 }
387 #endif
388 
389 //-----------------------------------------------------------------------------
390 
395 double GaussianKerDenSOM::functional(const TS* _examples, const FuzzyMap* _som,
396  double _sigma, double _reg, double& _likelihood,
397  double& _penalty)
398 {
399  unsigned j;
400  unsigned vv;
401  unsigned cc;
402  double t;
403  _likelihood = 0;
404  for (vv = 0; vv < numVectors; vv++)
405  {
406  t = codeDens(_som, &(_examples->theItems[vv]), _sigma);
407  if (t == 0)
408  {
409  t = 1e-300;
410  }
411  _likelihood += log(t);
412  }
413  _likelihood = -_likelihood;
414  _penalty = 0;
415 
416  if (_reg != 0)
417  {
418  for (cc = 0; cc < numNeurons; cc++)
419  {
420  _som->localAve(_som->indexToPos(cc), tmpV);
421  t = _som->getLayout().numNeig(_som, (SomPos) _som->indexToPos(cc));
422  for (j = 0; j < dim; j++)
423  {
424  _penalty += (t * (double)(_som->theItems[cc][j]) -
425  (double)(tmpV[j])) * (double)(_som->theItems[cc][j]);
426  }
427  }
428  _penalty = _reg * _penalty / (2.0 * _sigma);
429  }
430  return (_likelihood + _penalty);
431 }
432 
433 //-----------------------------------------------------------------------------
virtual double updateU(FuzzyMap *_som, const TS *_examples, const double &_sigma, double &_alpha)
std::vector< double > tmpD
Definition: kerdensom.h:137
Definition: map.h:308
virtual void Normalize(const std::vector< ClassicTrainingVectors::statsStruct > &_varStats)
Definition: code_book.cpp:516
Functional value (double)
MM memb
Alias for Fuzzy vectors.
size_t dim
Definition: kerdensom.h:135
double reg0
Definition: kerdensom.h:125
void localAve(const SomPos &_center, std::vector< double > &_aveVector) const
Definition: map.cpp:896
SomPos indexToPos(const unsigned &_i) const
Definition: map.cpp:1033
virtual void OnReportOperation(const std::string &_rsOp)=0
size_t somNSteps
Definition: kerdensom.h:128
double epsilon
Definition: kerdensom.h:127
virtual const unsigned & getVerbosity() const
Definition: xmipp_funcs.h:1065
size_t annSteps
Definition: kerdensom.h:124
std::vector< double > tmpD1
Definition: kerdensom.h:138
void write(const FileName &outFile, WriteModeMetaData mode=MD_OVERWRITE) const
virtual const Layout & getLayout() const
Definition: map.h:539
String integerToString(int I, int _width, char fill_with)
float floatFeature
Definition: data_types.h:72
virtual void unNormalize(const std::vector< ClassicTrainingVectors::statsStruct > &_varStats)
Definition: code_book.cpp:491
size_t numNeurons
Definition: kerdensom.h:133
glob_prnt iter
Sigma value (double)
virtual void updateV1(FuzzyMap *_som, const TS *_examples)
Definition: kerdensom.cpp:246
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
double reg1
Definition: kerdensom.h:126
double vv
virtual void classify(const ClassicTrainingVectors *_ts)
std::vector< std::vector< double > > tmpMap
Definition: kerdensom.h:136
virtual void initU(FuzzyMap *_som)
Definition: kerdensom.cpp:334
void log(Image< double > &op)
bool setValue(const MDObject &mdValueIn, size_t id)
size_t addObject() override
std::vector< Item > theItems
Definition: training_set.h:84
std::pair< long, long > SomPos
Definition: map.h:45
double sigma
Definition: kerdensom.h:123
constexpr signed int MAXZ
#define MAXFLOAT
Definition: data_types.h:47
#define j
virtual double mainIterations(FuzzyMap *_som, const TS *_examples, double &_sigma, const double &_reg)
Definition: kerdensom.cpp:173
virtual double codeDens(const FuzzyMap *_som, const FeatureVector *_example, double _sigma) const
virtual double numNeig(const FuzzyMap *_som, const SomPos &_center) const =0
String formatString(const char *format,...)
virtual double updateSigmaI(FuzzyMap *_som, const TS *_examples)
Definition: kerdensom.cpp:211
Regularization value (double)
std::vector< double > tmpDens
Definition: kerdensom.h:139
constexpr int K
virtual void train(FuzzyMap &_som, TS &_examples, FileName &_fn_vectors, bool _update=false, double _sigma=0, bool _saveIntermediate=false)
std::vector< double > tmpV
Definition: kerdensom.h:140
virtual void updateU1(FuzzyMap *_som, const TS *_examples)
Definition: kerdensom.cpp:287
size_t numVectors
Definition: kerdensom.h:134
#define PI
Definition: tools.h:43
std::vector< floatFeature > FeatureVector
Definition: data_types.h:86
virtual void calibrate(ClassicTrainingVectors &_ts, Label _def="")
Definition: code_book.cpp:328
float r1
virtual double functional(const TS *_examples, const FuzzyMap *_som, double _sigma, double _reg, double &_likelihood, double &_penalty)
virtual double updateSigmaII(FuzzyMap *_som, const TS *_examples, const double &_reg, const double &_alpha)