Xmipp  v3.23.11-Nereus
analyze_cluster.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Carlos Oscar coss@cnb.csic.es (2010)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 #include "analyze_cluster.h"
27 #include <core/args.h>
28 #include <data/filters.h>
29 #include <data/mask.h>
30 #include "core/metadata_sql.h"
31 
32 // Minimum number of images to perform a PCA
33 const size_t Nmin=10;
34 
35 // Read arguments ==========================================================
37 {
38  fnSel = getParam("-i");
39  fnRef = getParam("--ref");
40  fnOut = getParam("-o");
41  if (checkParam("--basis"))
42  fnOutBasis = getParam("--basis");
43  NPCA = getIntParam("--NPCA");
44  Niter = getIntParam("--iter");
45  distThreshold = getDoubleParam("--maxDist");
46  dontMask = checkParam("--dontMask");
47 }
48 
49 // Show ====================================================================
51 {
52  if (verbose>0)
53  std::cerr
54  << "Input metadata file: " << fnSel << std::endl
55  << "Reference: " << fnRef << std::endl
56  << "Output metadata: " << fnOut << std::endl
57  << "Output basis stack: " << fnOutBasis << std::endl
58  << "PCA dimension: " << NPCA << std::endl
59  << "Iterations: " << Niter << std::endl
60  << "Maximum distance: " << distThreshold << std::endl
61  << "Don't mask: " << dontMask << std::endl
62  ;
63 }
64 
65 // usage ===================================================================
67 {
68  addUsageLine("Score the images in a cluster according to their PCA projection");
69  addUsageLine("It is assumed that the cluster is aligned as is the case of the output of CL2D or ML2D");
70  addParamsLine(" -i <metadatafile> : metadata file with images assigned to the cluster");
71  addParamsLine(" -o <metadatafile> : output metadata");
72  addParamsLine(" [--ref <img=\"\">] : if an image is provided, differences are computed with respect to it");
73  addParamsLine(" [--basis <stackName>] : write the average (image 1), standard deviation (image 2)");
74  addParamsLine(" : and basis of the PCA in a stack");
75  addParamsLine(" [--NPCA <dim=2>] : PCA dimension");
76  addParamsLine(" [--iter <N=10>] : Number of iterations");
77  addParamsLine(" [--maxDist <d=3>] : Maximum distance");
78  addParamsLine(" : Set to -1 if you don't want to filter images");
79  addParamsLine(" [--dontMask] : Don't use a circular mask");
80  addExampleLine("xmipp_classify_analyze_cluster -i images.sel --ref referenceImage.xmp -o sortedImages.xmd --basis basis.stk");
81 }
82 
83 // Produce side info ======================================================
84 //#define DEBUG
86 {
87  basis = fnOutBasis!="";
88 
89  // Read input selfile and reference
90  if (SFin.size()==0)
91  {
92  SFin.read(fnSel);
93  if (SFin.size()==0)
94  return;
97  MetaDataVec SFaux;
99  SFin=SFaux;
100  }
101  bool subtractRef=false;
102  Image<double> Iref;
103  if (fnRef!="")
104  {
105  subtractRef=true;
106  Iref.read(fnRef);
107  Iref().setXmippOrigin();
108  }
109 
110  // Prepare mask
111  size_t Xdim;
112  size_t Ydim;
113  size_t Zdim;
114  size_t Ndim;
115  getImageSize(SFin,Xdim,Ydim,Zdim,Ndim,image_label);
116  mask.resize(Ydim,Xdim);
118  if (dontMask)
119  mask.initConstant(1);
120  else
122  auto Npixels=(int)mask.sum();
123 
124  // Read all images in the class and subtract the mean
125  // once aligned
126  Image<double> I;
127  Image<double> Iaux;
128  pcaAnalyzer.clear();
130  if (verbose>0)
131  {
132  std::cerr << "Processing cluster ...\n";
134  }
135  int n=0;
136  MultidimArray<float> v(Npixels);
137  const MultidimArray<double> &mIref=Iref();
138  MultidimArray<double> Ialigned;
139  MultidimArray<double> ImirrorAligned;
141  AlignmentAux aux;
142  CorrelationAux aux2;
144  FileName fnImg;
145  for (size_t objId : SFin.ids())
146  {
147  SFin.getValue(image_label,fnImg, objId);
148  I.readApplyGeo( fnImg, SFin, objId);
149  if (XSIZE(I())!=Xdim || YSIZE(I())!=Ydim)
150  REPORT_ERROR(ERR_MULTIDIM_SIZE,"All images must be of the same size");
151  I().setXmippOrigin();
152  int idx=0;
153  if (subtractRef)
154  {
155  // Choose between this image and its mirror
156  Ialigned=I();
157  ImirrorAligned=Ialigned;
158  ImirrorAligned.selfReverseX();
159  ImirrorAligned.setXmippOrigin();
160  alignImages(mIref,Ialigned,M,xmipp_transformation::WRAP,aux,aux2,aux3);
161  alignImages(mIref,ImirrorAligned,M,xmipp_transformation::WRAP,aux,aux2,aux3);
162  double corr=correlationIndex(mIref,Ialigned,&mask);
163  double corrMirror=correlationIndex(mIref,ImirrorAligned,&mask);
164  if (corr>corrMirror)
165  I()=Ialigned;
166  else
167  I()=ImirrorAligned;
168 
170  if (A2D_ELEM(mask,i,j))
171  A1D_ELEM(v,idx++)=IMGPIXEL(I,i,j)-A2D_ELEM(mIref,i,j);
172  }
173  else
174  {
176  if (A2D_ELEM(mask,i,j))
177  A1D_ELEM(v,idx++)=IMGPIXEL(I,i,j);
178  }
180  if ((++n)%10==0 && verbose>0)
181  progress_bar(n);
182  }
183  if (verbose>0)
185 }
186 #undef DEBUG
187 
188 // Produce basis ==========================================================
190 {
191  int iimax=pcaAnalyzer.PCAbasis.size();
192  basis.initZeros(iimax,1,YSIZE(mask),XSIZE(mask));
193  for (size_t ii=0; ii<pcaAnalyzer.PCAbasis.size(); ii++)
194  {
195  int idx=0;
196  double *ptrBasis=&DIRECT_NZYX_ELEM(basis, ii, 0, 0, 0);
197  const MultidimArray<double> &PCAbasis_ii=pcaAnalyzer.PCAbasis[ii];
199  if (DIRECT_A2D_ELEM(mask,i,j))
200  ptrBasis[i*XSIZE(mask)+j]=A1D_ELEM(PCAbasis_ii,idx++);
201  }
202 }
203 
204 // Run ====================================================================
206 {
207  show();
208  produceSideInfo();
209  size_t N=SFin.size();
210 
211  // Output
212  SFout=SFin;
213  if (N>Nmin)
214  {
216 
217  for (size_t n=0; n<N; n++)
218  {
219  int trueIdx=pcaAnalyzer.getSorted(n);
220  double zscore=pcaAnalyzer.getSortedZscore(n);
221  SFout.setValue(MDL_ZSCORE, zscore, SFout.getRowId(trueIdx));
222  if (zscore<distThreshold || distThreshold<0)
223  SFout.setValue(MDL_ENABLED,1, SFout.getRowId(trueIdx));
224  else
225  SFout.setValue(MDL_ENABLED,-1, SFout.getRowId(trueIdx));
226  }
227  }
229  if (basis && N>Nmin)
230  {
233  produceBasis(basis());
234  basis.write(fnOutBasis);
235  }
236 }
void removeObjects(const std::vector< size_t > &toRemove) override
void init_progress_bar(long total)
#define YSIZE(v)
#define A2D_ELEM(v, i, j)
void resize(size_t Ndim, size_t Zdim, size_t Ydim, size_t Xdim, bool copy=true)
void addVector(const MultidimArray< float > &_v)
Add vector.
Definition: basic_pca.h:100
void removeDuplicates(MetaData &MDin, MDLabel label=MDL_UNDEFINED)
double getDoubleParam(const char *param, int arg=0)
#define DIRECT_NZYX_ELEM(v, l, k, i, j)
void read(const FileName &inFile, const std::vector< MDLabel > *desiredLabels=nullptr, bool decomposeStack=true) override
double alignImages(const MultidimArray< double > &Iref, const AlignmentTransforms &IrefTransforms, MultidimArray< double > &I, Matrix2D< double > &M, bool wrap, AlignmentAux &aux, CorrelationAux &aux2, RotationalCorrelationAux &aux3)
Definition: filters.cpp:2047
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
#define FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(m)
int getSorted(int n)
Definition: basic_pca.h:152
void BinaryCircularMask(MultidimArray< int > &mask, double radius, int mode, double x0, double y0, double z0)
Definition: mask.cpp:193
#define DIRECT_A2D_ELEM(v, i, j)
void write(const FileName &name="", size_t select_img=ALL_IMAGES, bool isStack=false, int mode=WRITE_OVERWRITE, CastWriteMode castMode=CW_CAST, int _swapWrite=0)
void run()
Main routine.
double correlationIndex(const MultidimArray< T > &x, const MultidimArray< T > &y, const MultidimArray< int > *mask=NULL, MultidimArray< double > *Contributions=NULL)
#define A1D_ELEM(v, i)
void getImageSize(const MetaData &md, size_t &Xdim, size_t &Ydim, size_t &Zdim, size_t &Ndim, MDLabel image_label)
void initConstant(T val)
void write(const FileName &outFile, WriteModeMetaData mode=MD_OVERWRITE) const
Incorrect MultidimArray size.
Definition: xmipp_error.h:174
virtual IdIteratorProxy< false > ids()
int readApplyGeo(const FileName &name, const MDRow &row, const ApplyGeoParams &params=DefaultApplyGeoParams)
size_t size() const override
#define i
Is this image enabled? (int [-1 or 1])
#define FOR_ALL_ELEMENTS_IN_ARRAY2D(m)
void produceSideInfo(MDLabel image_label=MDL_IMAGE)
Produce side info.
const char * getParam(const char *param, int arg=0)
void evaluateZScore(int NPCA, int Niter, bool trained=false, const char *filename="temp.txt", int numdesc=0)
Definition: basic_pca.cpp:384
bool setValue(const MDObject &mdValueIn, size_t id)
PCAMahalanobisAnalyzer pcaAnalyzer
void produceBasis(MultidimArray< double > &basis)
Produce basis.
#define XSIZE(v)
void progress_bar(long rlen)
void reserve(int newSize)
Resize.
Definition: basic_pca.h:94
void addExampleLine(const char *example, bool verbatim=true)
std::vector< MultidimArray< double > > PCAbasis
Definition: basic_pca.h:69
const size_t Nmin
int verbose
Verbosity level.
#define j
void deleteFile() const
size_t getRowId(const MetaDataVecRow &) const
bool getValue(MDObject &mdValueOut, size_t id) const override
MultidimArray< int > mask
double getSortedZscore(int n)
Definition: basic_pca.h:146
Global Z Score (double)
void readParams()
Read argument from command line.
void clear()
Clear.
Definition: basic_pca.h:84
void defineParams()
Define parameters.
bool checkParam(const char *param)
int read(const FileName &name, DataMode datamode=DATA, size_t select_img=ALL_IMAGES, bool mapData=false, int mode=WRITE_READONLY)
void addUsageLine(const char *line, bool verbatim=false)
void initZeros(const MultidimArray< T1 > &op)
bool containsLabel(const MDLabel label) const override
int getIntParam(const char *param, int arg=0)
int * n
Name of an image (std::string)
MDLabel
double sum() const
void addParamsLine(const String &line)
#define IMGPIXEL(I, i, j)
constexpr int INNER_MASK
Definition: mask.h:47