Xmipp  v3.23.11-Nereus
classify_kerdensom.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Alberto Pascual Montano (pascual@cnb.csic.es)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 #include <fstream>
27 #include "core/xmipp_program.h"
29 #include "core/metadata_vec.h"
30 
31 /* Parameters class ======================================================= */
33 {
34 public:
35  /* Input Parameters ======================================================== */
36  FileName fn_in; // input file
37  FileName fn_root; // output file
38  FileName tmpN; // Temporary variable
39  double eps; // Stopping criteria
40  unsigned iter; // Iteration number
41  bool norm; // Normalize?
42  size_t xdim; // X-dimension (-->)
43  size_t ydim; // Y-dimension
44  double reg0; // Initial reg
45  double reg1; // Final reg
46  std::string layout; // layout (Topology)
47  unsigned annSteps; // Deterministic Annealing steps
48 public:
49  // Define parameters
50  void defineParams()
51  {
52  addUsageLine("Purpose: Kernel Density Estimator Self-Organizing Map");
53  addUsageLine("+KerDenSOM stands for Kernel Probability Density Estimator Self-Organizing Map.");
54  addUsageLine("+It maps a set of high dimensional input vectors into a two-dimensional grid. ");
55  addUsageLine("+For more information, please see the following [[http://www.ncbi.nlm.nih.gov/pubmed/11472094][reference]].");
56  addUsageLine("+");
57  addUsageLine("+The topology of the network can be hexagonal or rectangular (see below). ");
58  addUsageLine("+It is advised to design maps with one of its sides larger than the other (e.g. 10x5).");
59  addUsageLine("+ Xdim is ------>",true);
60  addUsageLine("+ HEXAGONAL:",true);
61  addUsageLine("+ O O O O O O O O O",true);
62  addUsageLine("+O O O & & & O O O",true);
63  addUsageLine("+ O O & @ @ & O O O",true);
64  addUsageLine("+O O & @ + @ & O O",true);
65  addUsageLine("+ O O & @ @ & O O O",true);
66  addUsageLine("+O O O & & & O O O",true);
67  addUsageLine("+ O O O O O O O O O",true);
68  addUsageLine("+ RECTANGULAR:",true);
69  addUsageLine("+O O O O O O O O O",true);
70  addUsageLine("+0 O O O & O O O O",true);
71  addUsageLine("+O O O & @ & O O O",true);
72  addUsageLine("+O O & @ + @ & O O",true);
73  addUsageLine("+O O O & @ & O O O",true);
74  addUsageLine("+O O O O & O O O O",true);
75  addUsageLine("+O O O O O O O O O",true);
76  addSeeAlsoLine("image_vectorize");
77  addParamsLine(" -i <file_in> : Input data file");
78  addParamsLine(" : This file is generated by [[image_vectorize_v3][image_vectorize]]");
79  addParamsLine(" --oroot <rootname> : rootname_classes.xmd, rootname_images.xmd and rootname_vectors.xmd will be created");
80  addParamsLine(" : This file mst be read by [[image_vectorize_v3][image_vectorize]]");
81  addParamsLine(" [--xdim <Hdimension=10>] : Horizontal size of the map");
82  addParamsLine(" [--ydim <Vdimension=5>] : Vertical size of the map");
83  addParamsLine(" [--topology <topology=RECT>] : Lattice topology");
84  addParamsLine(" :+The following picture will help in understanding the differences ");
85  addParamsLine(" :+between both topologies and the map axis convention:");
86  addParamsLine(" where <topology> RECT HEXA");
87  addParamsLine(" [--deterministic_annealing <steps=10> <Initial_reg=1000> <Final_reg=100>] : Deterministic annealing");
88  addParamsLine(" : controls the smoothness regularization");
89  addParamsLine(" : Set it to 0 0 0 for Kernel C-means");
90  addParamsLine(" [--eps <epsilon=1e-7>] : Stopping criteria");
91  addParamsLine(" [--iter <N=200>] : Number of iterations");
92  addParamsLine(" [--norm] : Normalize input data");
93  addExampleLine("xmipp_image_vectorize -i images.stk -o vectors.xmd");
94  addExampleLine("xmipp_classify_kerdensom -i vectors.xmd -o kerdensom.xmd");
95  }
96 
97  // Read parameters
98  void readParams()
99  {
100  fn_in = getParam("-i");
101  if (checkParam("--oroot"))
102  fn_root=getParam("--oroot");
103  ydim = getIntParam("--ydim");
104  xdim = getIntParam("--xdim");
105  layout = getParam("--topology");
106  annSteps = getIntParam("--deterministic_annealing",0);
107  reg0 = getDoubleParam("--deterministic_annealing",1);
108  reg1 = getDoubleParam("--deterministic_annealing",2);
109  eps = getDoubleParam("--eps");
110  iter = getIntParam("--iter");
111  norm = checkParam("--norm");
112 
113  // Some checks
114  if (iter < 1)
115  REPORT_ERROR(ERR_ARG_INCORRECT,"iter must be > 1");
116 
117  if ((reg0 <= reg1) && (reg0 != 0) && (annSteps > 1))
118  REPORT_ERROR(ERR_ARG_INCORRECT,"reg0 must be > reg1");
119  if (reg0 == 0)
120  annSteps = 0;
121  if (reg0 < 0)
122  REPORT_ERROR(ERR_ARG_INCORRECT,"reg0 must be > 0");
123  if (reg1 < 0)
124  REPORT_ERROR(ERR_ARG_INCORRECT,"reg1 must be > 0");
125  if (xdim < 1)
126  REPORT_ERROR(ERR_ARG_INCORRECT,"xdim must be >= 1");
127  if (ydim < 1)
128  REPORT_ERROR(ERR_ARG_INCORRECT,"ydim must be >= 1");
129  }
130 
131  void show()
132  {
133  std::cout << "Input data file : " << fn_in << std::endl;
134  std::cout << "Output rootname : " << fn_root << std::endl;
135  std::cout << "Horizontal dimension (Xdim) = " << xdim << std::endl;
136  std::cout << "Vertical dimension (Ydim) = " << ydim << std::endl;
137  if (layout == "HEXA")
138  std::cout << "Hexagonal topology " << std::endl;
139  else
140  std::cout << "Rectangular topology " << std::endl;
141  std::cout << "Initial smoothness factor (reg0) = " << reg0 << std::endl;
142  std::cout << "Final smoothness factor (reg1) = " << reg1 << std::endl;
143  std::cout << "Deterministic annealing steps = " << annSteps << std::endl;
144  std::cout << "Total number of iterations = " << iter << std::endl;
145  std::cout << "Stopping criteria (eps) = " << eps << std::endl;
146  if (norm)
147  std::cout << "Normalize input data" << std::endl;
148  else
149  std::cout << "Do not normalize input data " << std::endl;
150  }
151 
152  // Run
153  void run()
154  {
155  FileName fnClasses=fn_root+"_classes.xmd";
157  dummy.write(formatString("classes@%s",fnClasses.c_str()));
158 
159  /* Open training vector ================================================= */
160  ClassicTrainingVectors ts(0, true);
161  std::cout << std::endl << "Reading input data file " << fn_in << "....." << std::endl;
162  ts.read(fn_in);
163 
164  /* Real stuff ============================================================== */
165  if (norm)
166  {
167  std::cout << "Normalizing....." << std::endl;
168  ts.normalize(); // Normalize input data
169  }
170 
171  auto *myMap = new FuzzyMap(layout, xdim, ydim, ts);
172 
173  KerDenSOM *thisSOM= new GaussianKerDenSOM(reg0, reg1, annSteps, eps, iter); // Creates KerDenSOM Algorithm
174 
175  TextualListener myListener; // Define the listener class
176  myListener.setVerbosity() = verbose; // Set verbosity level
177  thisSOM->setListener(&myListener); // Set Listener
178  thisSOM->train(*myMap, ts, fnClasses); // Train algorithm
179 
180  // Test algorithm
181  double dist = thisSOM->test(*myMap, ts);
182  std::cout << std::endl << "Quantization error : " << dist << std::endl;
183 
184  // Classifying
185  std::cout << "Classifying....." << std::endl;
186  myMap->classify(&ts);
187 
188  // Calibrating
189  std::cout << "Calibrating....." << std::endl;
190  myMap->calibrate(ts);
191 
192  /*******************************************************
193  Saving all kind of Information
194  *******************************************************/
195  // Save map size
196  MetaDataVec MDkerdensom;
197  size_t id=MDkerdensom.addObject();
198  MDkerdensom.setValue(MDL_XSIZE,xdim,id);
199  MDkerdensom.setValue(MDL_YSIZE,ydim,id);
200  MDkerdensom.setValue(MDL_MAPTOPOLOGY,layout,id);
201  MDkerdensom.write(formatString("KerDenSOM_Layout@%s",fnClasses.c_str()),MD_APPEND);
202 
203  // save intracluster distance and number of vectors in each cluster
204  MetaDataVec MDsummary;
205  for (unsigned i = 0; i < myMap->size(); i++)
206  {
207  id=MDsummary.addObject();
208  MDsummary.setValue(MDL_REF,(int)i+1,id);
209  MDsummary.setValue(MDL_CLASSIFICATION_INTRACLASS_DISTANCE,myMap->aveDistances[i],id);
210  MDsummary.setValue(MDL_CLASS_COUNT,(size_t)myMap->classifSizeAt(i),id);
211  }
212  MDsummary.write(formatString("classes@%s",fnClasses.c_str()),MD_APPEND);
213 
214  // assign data to clusters according to fuzzy threshold
215  std::cout << "Saving neurons assigments ....." << std::endl;
216  MetaDataVec vectorContentIn, MDimages;
217  vectorContentIn.read(formatString("vectorContent@%s",fn_in.c_str()));
218  std::vector<size_t> objIds;
219  vectorContentIn.findObjects(objIds);
220  for (unsigned i = 0; i < myMap->size(); i++)
221  {
222  MetaDataVec MD;
223  for (size_t j = 0; j < myMap->classifAt(i).size(); j++)
224  {
225  MDRowVec row;
226  size_t order=myMap->classifAt(i)[j];
227  vectorContentIn.getRow(row,objIds[order]);
228  row.setValue(MDL_REF,(int) i+1);
229  MDimages.addRow(row);
230  MD.addRow(row);
231  }
232  if (MD.size()>0)
233  {
235  MD.write(formatString("class%06d_images@%s",i+1,fnClasses.c_str()),MD_APPEND);
236  }
237  }
238  MDimages.removeLabel(MDL_ORDER);
239  MDimages.write((String)"images@"+fn_root+"_images.xmd");
240 
241  // Save code vectors
242  if (norm)
243  {
244  std::cout << "Denormalizing code vectors....." << std::endl;
245  myMap->unNormalize(ts.getNormalizationInfo()); // de-normalize codevectors
246  }
247  MetaDataVec vectorHeaderIn, vectorHeaderOut, vectorContentOut;
248  vectorHeaderIn.read(formatString("vectorHeader@%s",fn_in.c_str()));
249  vectorHeaderOut.setColumnFormat(false);
250  size_t size, vectorSize;
251  size_t idIn=vectorHeaderIn.firstRowId();
252  size_t idOut=vectorHeaderOut.addObject();
253  vectorHeaderIn.getValue(MDL_XSIZE,size,idIn);
254  vectorHeaderOut.setValue(MDL_XSIZE,size,idOut);
255  vectorHeaderIn.getValue(MDL_YSIZE,size,idIn);
256  vectorHeaderOut.setValue(MDL_YSIZE,size,idOut);
257  vectorHeaderIn.getValue(MDL_ZSIZE,size,idIn);
258  vectorHeaderOut.setValue(MDL_ZSIZE,size,idOut);
259  vectorHeaderOut.setValue(MDL_COUNT,myMap->size(),idOut);
260  vectorHeaderIn.getValue(MDL_CLASSIFICATION_DATA_SIZE,vectorSize,idIn);
261  vectorHeaderOut.setValue(MDL_CLASSIFICATION_DATA_SIZE,vectorSize,idOut);
262  vectorHeaderOut.write(formatString("vectorHeader@%s_vectors.xmd",fn_root.c_str()),MD_APPEND);
263  FileName fnVectorsRaw=fn_root+"_vectors.vec";
264  std::ofstream fhVectorsRaw(fnVectorsRaw.c_str(),std::ios::binary);
265  if (!fhVectorsRaw)
266  REPORT_ERROR(ERR_IO_NOWRITE,fnVectorsRaw);
267  for (size_t i = 0; i < myMap->size(); i++)
268  {
269  id=vectorContentOut.addObject();
270  vectorContentOut.setValue(MDL_REF,(int)i+1,id);
271  vectorContentOut.setValue(MDL_ORDER,i,id);
272  fhVectorsRaw.write((char*)&(myMap->theItems[i][0]),vectorSize*sizeof(float));
273  }
274  fhVectorsRaw.close();
275  vectorContentOut.write(formatString("vectorContent@%s_vectors.xmd",fn_root.c_str()),MD_APPEND);
276  delete myMap;
277  delete thisSOM;
278  }
279 };
Z size (int)
double getDoubleParam(const char *param, int arg=0)
Definition: map.h:308
void read(const FileName &inFile, const std::vector< MDLabel > *desiredLabels=nullptr, bool decomposeStack=true) override
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
void setListener(BaseListener *_listener)
bool removeLabel(const MDLabel label) override
Couldn&#39;t write to file.
Definition: xmipp_error.h:140
void setValue(const MDObject &object) override
void read(const FileName &fnIn)
void write(const FileName &outFile, WriteModeMetaData mode=MD_OVERWRITE) const
virtual double test(const FuzzyMap &_som, const TS &_examples) const
Definition: kerdensom.cpp:74
std::unique_ptr< MDRow > getRow(size_t id) override
virtual void train(FuzzyMap &_som, TS &_examples, FileName &_fn_vectors, bool _update=false, double _sigma=0, bool _saveIntermediate=false)=0
size_t size() const override
#define i
size_t addRow(const MDRow &row) override
void addSeeAlsoLine(const char *seeAlso)
Y size (int)
Number of elements of a type (int) [this is a genereic type do not use to transfer information to ano...
const char * getParam(const char *param, int arg=0)
bool setValue(const MDObject &mdValueIn, size_t id)
size_t addObject() override
virtual unsigned & setVerbosity()
Definition: xmipp_funcs.h:1072
Incorrect argument received.
Definition: xmipp_error.h:113
size_t firstRowId() const override
void findObjects(std::vector< size_t > &objectsOut, const MDQuery &query) const override
Size of data vectors for classification (int)
void addExampleLine(const char *example, bool verbatim=true)
int verbose
Verbosity level.
double dummy
#define j
bool getValue(MDObject &mdValueOut, size_t id) const override
Class to which the image belongs (int)
virtual void setColumnFormat(bool column)
Magnification of microscope.
X size (int)
Number of images assigned to the same class as this image.
std::string String
Definition: xmipp_strings.h:34
String formatString(const char *format,...)
virtual const std::vector< statsStruct > & getNormalizationInfo() const
bool checkParam(const char *param)
Average intraclass distance (double)
void addUsageLine(const char *line, bool verbatim=false)
int getIntParam(const char *param, int arg=0)
void addParamsLine(const String &line)