Xmipp  v3.23.11-Nereus
mpi_classify_CL2D_core_analysis.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Carlos Oscar Sanchez Sorzano coss@cnb.csic.es (2011)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
28 #include "core/matrix2d.h"
29 
30 // Show block ==============================================================
31 std::ostream & operator << (std::ostream &out, const CL2DBlock &block)
32 {
33  out << block.block << "@" << block.fnLevel << " -> level " << block.level << std::endl;
34  return out;
35 }
36 
37 // Empty constructor =======================================================
39 {
40  node=std::make_shared<MpiNode>(argc, argv);
41  if (!node->isMaster())
42  verbose=0;
43  taskDistributor=nullptr;
44  maxLevel=-1;
45  tolerance=0;
46  thPCAZscore=3;
47 }
48 
49 // Read arguments ==========================================================
51 {
52  fnRoot = getParam("--root");
53  fnODir = getParam("--dir");
54  if (checkParam("--computeCore"))
55  {
56  thPCAZscore = getDoubleParam("--computeCore",0);
57  NPCA = getIntParam("--computeCore",1);
59  }
60  else if (checkParam("--computeStableCore"))
61  {
62  tolerance = getIntParam("--computeStableCore",0);
64  }
65 }
66 
67 // Show ====================================================================
69 {
70  if (!verbose)
71  return;
72  std::cout << "CL2D rootname: " << fnRoot << std::endl
73  << "CL2D output dir: " << fnODir << std::endl;
75  std::cout << "Tolerance: " << tolerance << std::endl;
76  else
77  std::cout << "Threshold PCA Zscore: " << thPCAZscore << std::endl
78  << "Number of PCA dimensions: " << NPCA << std::endl;
79 }
80 
81 // usage ===================================================================
83 {
84  addUsageLine("Compute the core of a CL2D clustering");
85  addParamsLine(" --root <rootname> : Rootname of the CL2D");
86  addParamsLine(" --dir <dir> : Output directory of the CL2D");
87  addParamsLine(" --computeCore <thPCAZscore=3> <NPCA=2>: The class cores are computed by thresholding the Zscore of");
88  addParamsLine(" : the class images and their projections onto a nD PCA space (by default, n=2)");
89  addParamsLine("or --computeStableCore <tolerance=1> : The stable core is formed by all the images in the class that have been");
90  addParamsLine(" : in the same class (except in tolerance levels) in the whole hierarchy.");
91  addParamsLine(" : If tolerance=0, then the stable core is formed by the set of images that");
92  addParamsLine(" : have been always together. For this reason, the stable core can be computed only");
93  addParamsLine(" : for the level such that level>tolerance");
94  addExampleLine("mpirun -np 4 `which xmipp_mpi_classify_CL2D_core_analysis` -i 2D/CL2D/run_001/results --computeCore");
95  addExampleLine("mpirun -np 4 `which xmipp_mpi_classify_CL2D_core_analysis` -i 2D/CL2D/run_001/results --computeStableCore");
96 }
97 
98 // Produce side info =====================================================
100 {
101  // Get maximum CL2D level
102  maxLevel=0;
103  FileName fnLevel;
104  do
105  fnLevel=formatString("%s/level_%02d/%s_classes.xmd",fnODir.c_str(),maxLevel++,fnRoot.c_str());
106  while (fnLevel.exists());
107  maxLevel-=2;
108  if (maxLevel==-1)
109  REPORT_ERROR(ERR_ARG_MISSING,"Cannot find any CL2D analysis in the directory given");
110 
111  // Read all the blocks available in all MetaData
112  StringVector blocksAux;
113  CL2DBlock block;
114  for (int level=0; level<=maxLevel; level++)
115  {
116  fnLevel=formatString("%s/level_%02d/%s_classes.xmd",fnODir.c_str(),level,fnRoot.c_str());
117  getBlocksInMetaDataFile(fnLevel,blocksAux);
118  block.level=level;
119  block.fnLevel=fnLevel;
120  block.fnLevelCore=fnLevel.insertBeforeExtension("_core");
121  for (size_t i=0; i<blocksAux.size(); i++)
122  {
123  if (blocksAux[i].find("class")!=std::string::npos &&
124  blocksAux[i].find("images")!=std::string::npos)
125  {
126  block.block=blocksAux[i];
127  blocks.push_back(block);
128  }
129  }
130  }
131 
132  // Create a task file distributor for all blocks
133  size_t Nblocks=blocks.size();
134  taskDistributor=std::make_unique<MpiTaskDistributor>(Nblocks,1,node);
135 
136  // Get image dimensions
137  if (Nblocks>0)
138  {
139  size_t Zdim, Ndim;
140  getImageSizeFromFilename(blocks[0].block+"@"+blocks[0].fnLevel,Xdim,Ydim,Zdim,Ndim);
141  }
142 }
143 
144 // Outliers ===============================================================
146 {
147  if (verbose && node->rank==0)
148  std::cerr << "Computing cores ...\n";
149  ProgAnalyzeCluster analyzeCluster;
150  analyzeCluster.verbose=0;
151  analyzeCluster.NPCA=NPCA;
152  analyzeCluster.Niter=10;
153  analyzeCluster.distThreshold=thPCAZscore;
154  analyzeCluster.dontMask=false;
155 
156  MetaDataVec MD;
157  size_t first, last;
158  size_t Nblocks=blocks.size();
159  if (verbose && node->rank==0)
160  init_progress_bar(Nblocks);
161  while (taskDistributor->getTasks(first, last))
162  for (size_t idx=first; idx<=last; ++idx)
163  {
164  // Remove outliers in the PCA projection
165  analyzeCluster.SFin.clear();
166  analyzeCluster.fnSel=blocks[idx].block+"@"+blocks[idx].fnLevel;
167  analyzeCluster.fnOut=blocks[idx].fnLevel.insertBeforeExtension((String)"_core_"+blocks[idx].block);
168  analyzeCluster.run();
169 
170  // Remove outliers from file
171  MD.read(analyzeCluster.fnOut);
172  MD.removeDisabled();
173  MD.write(analyzeCluster.fnOut,MD_APPEND);
174 
175  if (verbose && node->rank==0)
176  progress_bar(idx);
177  }
178  taskDistributor->wait();
179  if (verbose && node->rank==0)
180  progress_bar(Nblocks);
181 
182  // Gather all results
183  gatherResults(0,"core");
184 }
185 
186 // Outliers ===============================================================
188 {
189  if (verbose && node->rank==0)
190  std::cerr << "Computing stable cores ...\n";
191  MetaDataDb thisClass, anotherClass, commonImages, thisClassCore;
192  size_t first, last;
193  Matrix2D<unsigned char> coocurrence;
194  Matrix1D<unsigned char> maximalCoocurrence;
195  int Nblocks=blocks.size();
196  taskDistributor->reset();
197  std::vector<size_t> commonIdx;
198  std::map<String,size_t> thisClassOrder;
199  String fnImg;
200  while (taskDistributor->getTasks(first, last))
201  for (size_t idx=first; idx<=last; ++idx)
202  {
203  // Read block
204  CL2DBlock &thisBlock=blocks[idx];
205  if (thisBlock.level<=tolerance)
206  continue;
207  if (!existsBlockInMetaDataFile(thisBlock.fnLevelCore, thisBlock.block))
208  continue;
209  thisClass.read(thisBlock.block+"@"+thisBlock.fnLevelCore);
210  thisClassCore.clear();
211 
212  // Add MDL_ORDER
213  if (thisClass.size()>0)
214  {
215  size_t order=0;
216  thisClassOrder.clear();
217  for (size_t objId : thisClass.ids())
218  {
219  thisClass.getValue(MDL_IMAGE,fnImg,objId);
220  thisClassOrder[fnImg]=order++;
221  }
222 
223  // Calculate coocurrence within all blocks whose level is inferior to this
224  size_t NthisClass=thisClass.size();
225  if (NthisClass>0)
226  {
227  try {
228  coocurrence.initZeros(NthisClass,NthisClass);
229  } catch (XmippError &e)
230  {
231  std::cerr << e.what() << std::endl;
232  std::cerr << "There is a memory allocation error. Most likely there are too many images in this class ("
233  << NthisClass << " images). Consider increasing the number of initial and final classes\n";
234  REPORT_ERROR(ERR_MEM_NOTENOUGH,"While computing stable class");
235  }
236  for (int n=0; n<Nblocks; n++)
237  {
238  CL2DBlock &anotherBlock=blocks[n];
239  if (anotherBlock.level>=thisBlock.level)
240  break;
241  if (!existsBlockInMetaDataFile(anotherBlock.fnLevelCore, anotherBlock.block))
242  continue;
243  anotherClass.read(anotherBlock.block+"@"+anotherBlock.fnLevelCore);
244  anotherClass.intersection(thisClass,MDL_IMAGE);
245  commonImages.join1(anotherClass, thisClass, MDL_IMAGE,LEFT);
246  commonIdx.resize(commonImages.size());
247  size_t idx=0;
248  for (size_t objId : commonImages.ids())
249  {
250  commonImages.getValue(MDL_IMAGE,fnImg,objId);
251  commonIdx[idx++]=thisClassOrder[fnImg];
252  }
253  size_t Ncommon=commonIdx.size();
254  for (size_t i=0; i<Ncommon; i++)
255  {
256  size_t idx_i=commonIdx[i];
257  for (size_t j=i+1; j<Ncommon; j++)
258  {
259  size_t idx_j=commonIdx[j];
260  MAT_ELEM(coocurrence,idx_i,idx_j)+=1;
261  }
262  }
263  }
264  }
265 
266  // Take only those elements whose coocurrence is maximal
267  maximalCoocurrence.initZeros(NthisClass);
268  int aimedCoocurrence=thisBlock.level-tolerance;
269  FOR_ALL_ELEMENTS_IN_MATRIX2D(coocurrence)
270  if (MAT_ELEM(coocurrence,i,j)==aimedCoocurrence)
271  VEC_ELEM(maximalCoocurrence,i)=VEC_ELEM(maximalCoocurrence,j)=1;
272 
273  // Now compute core
274  for (size_t objId : thisClass.ids())
275  {
276  thisClass.getValue(MDL_IMAGE,fnImg,objId);
277  size_t idx=thisClassOrder[fnImg];
278  if (VEC_ELEM(maximalCoocurrence,idx))
279  thisClassCore.addRow(*thisClass.getRow(objId));
280  }
281  }
282  thisClassCore.write(thisBlock.fnLevel.insertBeforeExtension((String)"_stable_core_"+thisBlock.block),MD_APPEND);
283  }
284  taskDistributor->wait();
285 
286  // Gather all results
287  gatherResults(tolerance+1,"stable_core");
288 }
289 
290 void ProgClassifyCL2DCore::gatherResults(int firstLevel, const String &suffix)
291 {
292  node->barrierWait();
293  if (node->rank==0)
294  {
295  FileName fnBlock, fnClass, fnSummary, fnSummaryOriginal;
296  Image<double> classAverage;
297  // Compute class averages
298  MetaDataVec classes, MD, MDoriginal;
299  int Nblocks=blocks.size();
300  for (int level=firstLevel; level<=maxLevel; level++)
301  {
302  classes.clear();
303  fnSummary=formatString("%s/level_%02d/%s_classes_%s",fnODir.c_str(),level,fnRoot.c_str(),suffix.c_str());
304  for (int idx=0; idx<Nblocks; idx++)
305  {
306  if (blocks[idx].level!=level)
307  continue;
308  fnBlock=fnSummary+"_"+blocks[idx].block+".xmd";
309  if (fileExists(fnBlock))
310  {
311  MD.read(fnBlock);
312  MDoriginal.read(formatString("%s@%s/level_%02d/%s_classes.xmd",blocks[idx].block.c_str(),fnODir.c_str(),level,fnRoot.c_str()));
313  int classNo=textToInteger(blocks[idx].block.substr(6,6));
314  size_t classSize=MD.size();
315  fnClass.compose(classNo,fnSummary,"stk");
316  if (classSize>0)
317  getAverageApplyGeo(MD, classAverage());
318  else
319  classAverage().initZeros(Ydim,Xdim);
320  classAverage.write(fnClass);
321 
322  size_t id=classes.addObject();
323  classes.setValue(MDL_REF,classNo,id);
324  classes.setValue(MDL_IMAGE,fnClass,id);
325  classes.setValue(MDL_CLASS_COUNT,classSize,id);
326  classes.setValue(MDL_MODELFRAC,((double)classSize)/MDoriginal.size(),id);
327  }
328  }
329  classes.write((String)"classes@"+fnSummary+".xmd");
330  }
331 
332  // Write the rest of blocks
333  for (int idx=0; idx<Nblocks; idx++)
334  {
335  fnSummary=formatString("%s/level_%02d/%s_classes_%s",fnODir.c_str(),blocks[idx].level,fnRoot.c_str(),suffix.c_str());
336  fnBlock=fnSummary+"_"+blocks[idx].block+".xmd";
337  if (fileExists(fnBlock))
338  {
339  MD.read(fnBlock);
340  unlink(fnBlock.c_str());
341  MD.write(blocks[idx].block+"@"+fnSummary+".xmd",MD_APPEND);
342  }
343  }
344  }
345  node->barrierWait();
346 }
347 
349 {
350  node->barrierWait();
351  if (node->rank==0)
352  {
353  FileName fnSummaryOriginal;
354  MetaDataVec MDoriginal;
355  int Nblocks=blocks.size();
356 
357  // Read and write reference to 2D classes blocks
358  for (int idx=0; idx<Nblocks; idx++)
359  {
360  int classNo=textToInteger(blocks[idx].block.substr(6,6));
361  fnSummaryOriginal=formatString("%s@%s/level_%02d/%s_classes.xmd",blocks[idx].block.c_str(),fnODir.c_str(),
362  blocks[idx].level,fnRoot.c_str());
363  MDoriginal.read(fnSummaryOriginal);
364  MDoriginal.fillConstant(MDL_REF, integerToString(classNo));
365  MDoriginal.write(fnSummaryOriginal,MD_APPEND);
366  }
367  }
368  node->barrierWait();
369 }
370 
371 // Run ====================================================================
373 {
374  show();
375  produceSideInfo();
377  if (action==COMPUTE_CORE)
378  computeCores();
379  else
381 }
#define FOR_ALL_ELEMENTS_IN_MATRIX2D(m)
Definition: matrix2d.h:104
Argument missing.
Definition: xmipp_error.h:114
ProgClassifyCL2DCore(int argc, char **argv)
Empty constructor.
void init_progress_bar(long total)
void getImageSizeFromFilename(const FileName &filename, size_t &Xdim, size_t &Ydim, size_t &Zdim, size_t &Ndim, MDLabel image_label)
#define VEC_ELEM(v, i)
Definition: matrix1d.h:245
double getDoubleParam(const char *param, int arg=0)
void read(const FileName &inFile, const std::vector< MDLabel > *desiredLabels=nullptr, bool decomposeStack=true) override
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
void intersection(const MetaDataDb &mdIn, const MDLabel label)
bool getValue(MDObject &mdValueOut, size_t id) const override
std::vector< SelLine >::iterator find(std::vector< SelLine > &text, const std::string &img_name)
Definition: selfile.cpp:553
FileName insertBeforeExtension(const String &str) const
bool existsBlockInMetaDataFile(const FileName &inFileWithBlock)
There is not enough memory for allocation.
Definition: xmipp_error.h:166
void write(const FileName &name="", size_t select_img=ALL_IMAGES, bool isStack=false, int mode=WRITE_OVERWRITE, CastWriteMode castMode=CW_CAST, int _swapWrite=0)
void run()
Main routine.
void compose(const String &str, const size_t no, const String &ext="")
void write(const FileName &outFile, WriteModeMetaData mode=MD_OVERWRITE) const
String integerToString(int I, int _width, char fill_with)
virtual IdIteratorProxy< false > ids()
void getBlocksInMetaDataFile(const FileName &inFile, StringVector &blockList)
std::vector< String > StringVector
Definition: xmipp_strings.h:35
size_t size() const override
#define i
void produceSideInfo()
Produce side info.
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
void clear() override
std::unique_ptr< MDRow > getRow(size_t id) override
Definition: mask.h:36
glob_log first
int argc
Original command line arguments.
Definition: xmipp_program.h:86
std::ostream & operator<<(std::ostream &out, const CL2DBlock &block)
Show CL2Dblock.
const char * getParam(const char *param, int arg=0)
bool setValue(const MDObject &mdValueIn, size_t id)
size_t addObject() override
size_t addRow(const MDRow &row) override
void progress_bar(long rlen)
const char ** argv
Definition: xmipp_program.h:87
void clear() override
Definition: metadata_db.cpp:54
std::unique_ptr< MpiTaskDistributor > taskDistributor
void addExampleLine(const char *example, bool verbatim=true)
void produceClassInfo()
Produce class input format.
Model fraction (alpha_k) for a Maximum Likelihood model.
int verbose
Verbosity level.
void write(const FileName &outFile, WriteModeMetaData mode=MD_OVERWRITE) const override
void fillConstant(MDLabel label, const String &value) override
bool exists() const
void initZeros()
Definition: matrix1d.h:592
size_t size() const override
#define j
Class to which the image belongs (int)
Number of images assigned to the same class as this image.
virtual void removeDisabled()
void gatherResults(int firstLevel, const String &suffix)
Gather results.
std::vector< CL2DBlock > blocks
std::string String
Definition: xmipp_strings.h:34
void readParams()
Read argument from command line.
bool fileExists(const char *filename)
String formatString(const char *format,...)
void read(const FileName &inFile, const std::vector< MDLabel > *desiredLabels=NULL, bool decomposeStack=true) override
void initZeros()
Definition: matrix2d.h:626
int textToInteger(const char *str)
void getAverageApplyGeo(const MetaData &md, MultidimArray< double > &_ave, MDLabel image_label)
void join1(const MetaDataDb &mdInLeft, const MetaDataDb &mdInRight, const MDLabel label, JoinType type=LEFT)
bool checkParam(const char *param)
void addUsageLine(const char *line, bool verbatim=false)
int getIntParam(const char *param, int arg=0)
int * n
Name of an image (std::string)
void addParamsLine(const String &line)
std::shared_ptr< MpiNode > node