Xmipp  v3.23.11-Nereus
basic_pca.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Carlos Oscar Sorzano (coss@cnb.csic.es)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 #include <algorithm>
27 #include <fstream>
28 #include "basic_pca.h"
29 #include "core/matrix2d.h"
30 #include "core/xmipp_funcs.h"
31 
32 /* Subtract average ------------------------------------------------------- */
34 {
35  int N=v.size();
36  if (N==0)
37  return;
38  // Compute average
39 
40  typeCast(v[0],avg);
41  for (int n=1; n<N; n++)
42  {
43  MultidimArray<float> &aux=v[n];
46  }
47  avg/=N;
48 
49  // Subtract average and compute stddev
51  typeCast(avg,avgF);
52 
53  for (int n=0; n<N; n++)
54  {
55  v[n]-=avgF;
56 
57  }
58 }
59 
60 /* Standardize variables -------------------------------------------------- */
62 {
63  int N=v.size();
64  if (N<=1)
65  return;
66  // Compute average
68  typeCast(v[0],avg);
69  for (int n=1; n<N; n++)
70  {
71  MultidimArray<float> &aux=v[n];
74  }
75  avg/=N;
76 
77  // Subtract average and compute stddev
79  typeCast(avg,avgF);
80  MultidimArray<double> stddev;
81  stddev.initZeros(avg);
82  for (int n=0; n<N; n++)
83  {
84  v[n]-=avgF;
85  MultidimArray<float> &aux=v[n];
87  {
88  float f=DIRECT_A1D_ELEM(aux,i);
89  DIRECT_A1D_ELEM(stddev,i)+=f*f;
90  }
91  }
92  stddev/=N-1;
94  DIRECT_A1D_ELEM(stddev,i)=sqrt(DIRECT_A1D_ELEM(stddev,i));
95 
96  // Divide by stddev
97  MultidimArray<float> istddevF;
98  istddevF.resize(stddev);
99  size_t NoInformation=0;
102  DIRECT_A1D_ELEM(istddevF,i)=(float)(1.0/DIRECT_A1D_ELEM(stddev,i));
103  else
104  {
105  DIRECT_A1D_ELEM(istddevF,i)=0.0;
106  NoInformation++;
107  }
108  if (NoInformation==XSIZE(stddev))
109  REPORT_ERROR(ERR_NUMERICAL,"There is no information to perform PCA");
110  for (int n=0; n<N; n++)
111  {
112  MultidimArray<float> &aux=v[n];
114  DIRECT_A1D_ELEM(aux,i)*=DIRECT_A1D_ELEM(istddevF,i);
115  }
116 }
117 
118 /* Add vector ------------------------------------------------------------- */
120 {
121  int N=v.size();
122  int NPCA=PCAbasis.size();
123  CtY.initZeros(NPCA,N);
124  for (int ii=0; ii<N; ii++)
125  {
126  const MultidimArray<float> &Iii=v[ii];
127  const size_t unroll=4;
128  size_t nmax=unroll*(MULTIDIM_SIZE(Iii)/unroll);
129  for (int jj=0; jj<NPCA; jj++)
130  {
131  const MultidimArray<double> &Ijj=PCAbasis[jj];
132 
133  double dotProduct=0;
134  size_t n=0;
135  const float *ptrii=MULTIDIM_ARRAY(Iii);
136  const double *ptrjj=MULTIDIM_ARRAY(Ijj);
137  for (size_t n=0; n<nmax; n+=unroll, ptrii+=unroll, ptrjj+=unroll)
138  {
139  dotProduct += *ptrii * *ptrjj;
140  dotProduct += *(ptrii+1) * *(ptrjj+1);
141  dotProduct += *(ptrii+2) * *(ptrjj+2);
142  dotProduct += *(ptrii+3) * *(ptrjj+3);
143  }
144  for (n=nmax, ptrii=MULTIDIM_ARRAY(Iii)+nmax, ptrjj=MULTIDIM_ARRAY(Ijj)+nmax;
145  n<MULTIDIM_SIZE(Iii); ++n, ++ptrii, ++ptrjj)
146  dotProduct += *ptrii * *ptrjj;
147  MAT_ELEM(CtY,jj,ii)=dotProduct;
148  }
149  }
150 }
151 
152 /* Add vector ------------------------------------------------------------- */
154 {
155 
156  int N=recons.size();
157  int NPCA=PCAbasis.size();
158 
159  for (int ii=0; ii<N; ii++)
160  {
162  for (int jj=0; jj<NPCA; jj++)
163  {
164  const MultidimArray<double> &Ijj=PCAbasis[jj];
165  DIRECT_A1D_ELEM(recons[ii],i)= DIRECT_A1D_ELEM(Ijj,i)*MAT_ELEM(CtY,jj,ii)+(float)DIRECT_A1D_ELEM(avg,i);
166  }
167  }
168 }
169 
170 void PCAMahalanobisAnalyzer::learnPCABasis(size_t NPCA, size_t Niter)
171 {
172  // Take the first vectors for the PCA basis
174  NPCA=XMIPP_MIN(NPCA,v.size());
175  std::vector<size_t> used;
176  PCAbasis.clear();
177  for (size_t n=0; n<NPCA; n++)
178  {
179  size_t nRandom;
180  do {
181  nRandom=(size_t)round((v.size()-1)*rnd_unif(0,1));
182  } while (std::find(used.begin(),used.end(),nRandom)!=used.end());
183  typeCast(v[nRandom],vPCA);
184  PCAbasis.push_back(vPCA);
185  used.push_back(nRandom);
186  }
187 
188  size_t N=v.size();
189  for (size_t n=0; n<Niter; n++)
190  {
191  // E-step ..........................................................
192  // Compute C^t*C
193  Matrix2D<double> CtC(NPCA,NPCA);
194  for (size_t ii=0; ii<NPCA; ii++)
195  {
196  const MultidimArray<double> &Iii=PCAbasis[ii];
197  for (size_t jj=ii; jj<NPCA; jj++)
198  {
199  const MultidimArray<double> &Ijj=PCAbasis[jj];
200  CtC(ii,jj)=0;
202  MAT_ELEM(CtC,ii,jj)+=
204 
205  if (ii!=jj)
206  CtC(jj,ii)=CtC(ii,jj);
207 
208  }
209  }
210 
211  Matrix2D<double> CtY;
213  projectOnPCABasis(CtY);
214  X=CtC.inv()*CtY;
215 
216  // M-step ..........................................................
217  Matrix2D<double> XtXXtinv=X.transpose()*(X*X.transpose()).inv();
218  for (size_t ii=0;ii<NPCA;ii++)
219  {
221  const size_t unroll=4;
222  size_t nmax=unroll*(MULTIDIM_SIZE(Ipca)/unroll);
223  Ipca.initZeros();
224  for (size_t jj=0; jj<N; jj++)
225  {
226  const MultidimArray<float> &I=v[jj];
227  const float *ptrI=MULTIDIM_ARRAY(I);
228  double *ptrPCA=MULTIDIM_ARRAY(Ipca);
229  double val=MAT_ELEM(XtXXtinv,jj,ii);
230  size_t n;
231  for (n=0; n<nmax; n+=unroll, ptrPCA+=unroll, ptrI+=unroll)
232  {
233  *ptrPCA += *ptrI * val;
234  *(ptrPCA+1) += *(ptrI+1) * val;
235  *(ptrPCA+2) += *(ptrI+2) * val;
236  *(ptrPCA+3) += *(ptrI+3) * val;
237  }
238  for (n=nmax, ptrI=MULTIDIM_ARRAY(I)+nmax, ptrPCA=MULTIDIM_ARRAY(Ipca)+nmax;
239  n<MULTIDIM_SIZE(I); ++n, ++ptrI, ++ptrPCA)
240  *ptrPCA += *ptrI * val;
241  }
242  }
243  }
244 
245  //Obtain the Orthonormal vectors for the C (According to paper)
246  gramSchmidt();
247 
248  //Generate a Matrix for true PCA and compute the average C'*data
249  Matrix2D<double> data;
250  double *refData;
251  MultidimArray<double> average;
252  data.initZeros(NPCA,v.size());
253  average.initZeros(NPCA);
254  refData = &MAT_ELEM(data,0,0);
255  for (size_t ii=0;ii<NPCA;ii++)
256  {
258  for (size_t jj=0;jj<v.size();jj++)
259  {
260  MultidimArray<float> &D=v[jj];
262  (*refData) += DIRECT_A1D_ELEM(C,i)*DIRECT_A1D_ELEM(D,i);
263  DIRECT_A1D_ELEM(average,ii)+=(*refData)++;
264  }
265  }
266  average/=v.size();
268  MAT_ELEM(data,i,j)-=DIRECT_A1D_ELEM(average,i);
269 
270  Matrix2D<double> covarMatrix;
273  covarMatrix=data*data.transpose();
274  svdcmp(covarMatrix,u,w,v);
275 
277  size_t xsize=XSIZE(PCAbasis[0]);
279  for (size_t i=0;i<xsize;i++)
280  {
281  temp.initZeros(1,NPCA);
282  for (size_t j=0;j<NPCA;j++)
283  for (size_t k=0;k<NPCA;k++)
284  {
285  const MultidimArray<double> &Iii=PCAbasis[k];
286  DIRECT_A1D_ELEM(temp,j)+=DIRECT_A1D_ELEM(Iii,i)* MAT_ELEM(v,k,j);
287  }
288  for (size_t k=0;k<NPCA;k++)
289  {
290  const MultidimArray<double> &Iii=PCAbasis[k];
291  DIRECT_A1D_ELEM(Iii,i)=DIRECT_A1D_ELEM(temp,k);
292  }
293  }
294 
295  // Normalize output vectors
296  for (size_t ii=0;ii<NPCA;ii++)
297  {
298  double norm=sqrt(PCAbasis[ii].sum2());
299  PCAbasis[ii]/=norm;
300  }
301 }
302 
315 {
316  MultidimArray<double> vArray;
317  MultidimArray<double> orthonormalBasis;
318  for (size_t ii=0;ii<PCAbasis.size();ii++)
319  {
322  if (ii==0)
323  {
324  double inorm=1.0/sqrt(Iii.sum2());
325  for (size_t i=0;i<XSIZE(Iii);i++)
326  DIRECT_A1D_ELEM(Iii,i)=DIRECT_A1D_ELEM(Iii,i)*inorm;
327  continue;
328  }
330  vArray.initZeros(static_cast<int>(XSIZE(Iii)));
331  orthonormalBasis.initZeros(static_cast<int>(XSIZE(Iii)));
332  for (size_t jj=0;jj<ii;jj++)
333  {
334  const MultidimArray<double> &Ijj=PCAbasis[jj];
335  double dotProduct=Iii.dotProduct(Ijj);
337  DIRECT_A1D_ELEM(vArray,i)-=dotProduct*DIRECT_A1D_ELEM(Ijj,i);
338  }
339  FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(orthonormalBasis)
340  DIRECT_A1D_ELEM(orthonormalBasis,i)= DIRECT_A1D_ELEM(Iii,i)+ DIRECT_A1D_ELEM(vArray,i);
341  double inorm=1.0/sqrt(orthonormalBasis.sum2());
342  for (size_t i=0;i<XSIZE(Iii);i++)
343  DIRECT_A1D_ELEM(Iii,i)=DIRECT_A1D_ELEM(orthonormalBasis,i)*inorm;
344  }
345 }
346 
347 /* Compute Statistics ----------------------------------------------------- */
349  MultidimArray<double> & stddev)
350 {
351  int N=v.size();
352  if (N==0)
353  {
354  avg.clear();
355  stddev.clear();
356  return;
357  }
358  typeCast(v[0],avg);
360  for (int n=1; n<N; n++)
361  {
362  typeCast(v[n],aux);
363  avg+=aux;
364  }
365  avg/=N;
366  stddev.initZeros(avg);
367  for (int n=0; n<N; n++)
368  {
369  typeCast(v[n],aux);
370  aux-=avg;
371  aux*=aux;
372  stddev+=aux;
373  }
374  if (1 == N) {
375  REPORT_ERROR(ERR_NUMERICAL, "N was 1 (one), which would lead to division by zero\n");
376  }
377  double iN_1=1.0/(N-1);
379  DIRECT_A1D_ELEM(stddev,i)=sqrt(DIRECT_A1D_ELEM(stddev,i)*iN_1);
380 }
381 
382 /* Evaluate score --------------------------------------------------------- */
383 //#define DEBUG
384 void PCAMahalanobisAnalyzer::evaluateZScore(int NPCA, int Niter, bool trained,
385  const char* fileName, int numdesc)
386 {
387 
388  int N=v.size();
389  if (N==0)
390  {
391  Zscore.clear();
392  idx.clear();
393  return;
394  }
395  else if (N==1)
396  {
397  Zscore.initZeros(N);
399  return;
400  }
401 
402 
403 #ifdef DEBUG
404  std::cout << "Input vectors\n";
405  for (int n=0; n<N; n++)
406  {
408  std::cout << DIRECT_A1D_ELEM(v[n],i) << " ";
409  std::cout << std::endl;
410  }
411 #endif
412  subtractAvg();
413 
414 
415 #ifdef DEBUG
416 
417  std::cout << "\n\nInput vectors after normalization\n";
418  for (int n=0; n<N; n++)
419  {
421  std::cout << DIRECT_A1D_ELEM(v[n],i) << " ";
422  std::cout << std::endl;
423  }
424 #endif
425 
426  if (!trained)
427  {
428  learnPCABasis(NPCA, Niter);
429  std::ofstream fhOutPCA(fileName);
430  if (fhOutPCA.is_open())
431  {
432  for (int n=0; n<NPCA; n++)
433  {
434  for (int i=0; i<numdesc; i++)
435  fhOutPCA << DIRECT_A1D_ELEM(PCAbasis[n],i) << " ";
436  fhOutPCA << std::endl;
437  }
438  fhOutPCA.close();
439  }
440  else std::cerr << "Unable to open file " << fileName << std::endl;
441  }
442  else
443  {
444  PCAbasis.clear();
445  PCAbasis.resize(NPCA, numdesc);
446  std::ifstream fhOutPCA;
447  fhOutPCA.open(fileName);
448  if (fhOutPCA.is_open())
449  {
450  for (int n=0; n<NPCA; n++)
451  {
452  for (int i=0; i<numdesc; i++)
453  fhOutPCA >> DIRECT_A1D_ELEM(PCAbasis[n],i);
454  }
455  fhOutPCA.close();
456  }
457  else std::cerr << "Unable to open file " << fileName << std::endl;
458  }
459 
460 #ifdef DEBUG
461 
462  std::cout << "\n\nPCA basis\n";
463  for (int n=0; n<NPCA; n++)
464  {
466  std::cout << DIRECT_A1D_ELEM(PCAbasis[n],i) << " ";
467  std::cout << std::endl;
468  }
469 #endif
470 
471  Matrix2D<double> proj;
472  projectOnPCABasis(proj);
473 
474 
475 #ifdef DEBUG
476 
477  std::cout << "\n\nPCA projected values\n" << proj << std::endl;
478 #endif
479 
480  // Estimate covariance matrix
481  Matrix2D<double> cov(NPCA,NPCA);
482  for (int ii=0; ii<N; ii++)
483  {
484  for (int i=0; i<NPCA; i++)
485  for (int j=0; j<NPCA; j++)
486  MAT_ELEM(cov,i,j)+=MAT_ELEM(proj,i,ii)*MAT_ELEM(proj,j,ii);
487  }
488  cov/=N;
489 #ifdef DEBUG
490 
491  std::cout << "\n\nCovariance matrix\n" << cov << std::endl;
492 #endif
493 
494  Matrix2D<double> covinv=cov.inv();
495  Zscore.initZeros(N);
496  for (int ii=0; ii<N; ii++)
497  {
498  Zscore(ii)=0;
499  for (int i=0; i<NPCA; i++)
500  {
501  double xi=MAT_ELEM(proj,i,ii);
502  for (int j=0; j<NPCA; j++)
503  DIRECT_A1D_ELEM(Zscore,ii)+=xi*MAT_ELEM(proj,j,ii)*MAT_ELEM(covinv,i,j);
504  }
505  Zscore(ii)=sqrt(fabs(Zscore(ii)));
506  }
507 #ifdef DEBUG
508  std::cout << "\n\nZscores\n";
510  std::cout << DIRECT_A1D_ELEM(Zscore,i) << " ";
511  std::cout << std::endl;
512 #endif
513 
515 }
516 #undef DEBUG
517 
518 // Empty constructor
520 {
521  N=0;
522 }
523 
524 // Add a new vector
526 {
527  if (N==0)
528  {
529  ysum=y;
530  yxt.resizeNoCopy(y);
531  c1.resizeNoCopy(y);
532  ycentered.resizeNoCopy(y);
533  N=1;
534  zn=0;
535  }
536  else if (N==1)
537  {
538  xxt=0;
540  {
541  double yval=DIRECT_MULTIDIM_ELEM(y,n);
542  DIRECT_MULTIDIM_ELEM(yxt,n)=DIRECT_MULTIDIM_ELEM(c1,n)=yval-0.5*DIRECT_MULTIDIM_ELEM(ysum,n);// c1=y-ysum/2
544  }
545  c1/=sqrt(xxt); // Normalize c1
546  zn=1;
547  N=2;
548  }
549  else
550  {
551  double iN=1.0/N;
552  zn=0;
554  {
555  DIRECT_MULTIDIM_ELEM(ycentered,n)=DIRECT_MULTIDIM_ELEM(y,n)-iN*DIRECT_MULTIDIM_ELEM(ysum,n);// ycentered=y-ysum/N
556  zn+=DIRECT_MULTIDIM_ELEM(c1,n)*DIRECT_MULTIDIM_ELEM(ycentered,n); // zn = ycentered^T c1
557  }
558  if (fabs(zn)>maxzn)
559  return;
560 
561  xxt+=zn*zn;
562  double ixxt=1.0/xxt;
563  double c1norm=0;
565  {
566  DIRECT_MULTIDIM_ELEM(ysum,n)+=DIRECT_MULTIDIM_ELEM(y,n); // ysum+=y
567  DIRECT_MULTIDIM_ELEM(yxt,n)+=zn*DIRECT_MULTIDIM_ELEM(ycentered,n); // yxt+=zn*ycentered
568  DIRECT_MULTIDIM_ELEM(c1,n)=ixxt*DIRECT_MULTIDIM_ELEM(yxt,n); // c1=yxt/xxt
570  }
571  c1/=sqrt(c1norm);
572  N++;
573  }
574 }
#define FOR_ALL_ELEMENTS_IN_MATRIX2D(m)
Definition: matrix2d.h:104
double xi
int * nmax
PCAonline()
Empty constructor.
Definition: basic_pca.cpp:519
void resize(size_t Ndim, size_t Zdim, size_t Ydim, size_t Xdim, bool copy=true)
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
#define MULTIDIM_SIZE(v)
void projectOnPCABasis(Matrix2D< double > &CtY)
Project on basis.
Definition: basic_pca.cpp:119
void addVector(MultidimArray< double > &y)
Definition: basic_pca.cpp:525
void sqrt(Image< double > &op)
double dotProduct(const MultidimArray< T > &op1)
std::vector< SelLine >::iterator find(std::vector< SelLine > &text, const std::string &img_name)
Definition: selfile.cpp:553
static double * y
std::vector< MultidimArray< float > > v
Definition: basic_pca.h:66
void subtractAvg()
Subtract average.
Definition: basic_pca.cpp:33
#define MULTIDIM_ARRAY(v)
void inv(Matrix2D< T > &result) const
Definition: matrix2d.cpp:663
Matrix2D< T > transpose() const
Definition: matrix2d.cpp:1314
T norm(const std::vector< T > &v)
Definition: vector_ops.h:399
MultidimArray< int > idx
Definition: basic_pca.h:78
#define FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(v)
void computeStatistics(MultidimArray< double > &avg, MultidimArray< double > &stddev)
Compute Statistics.
Definition: basic_pca.cpp:348
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
double rnd_unif()
void standardarizeVariables()
Standardarize variables.
Definition: basic_pca.cpp:61
#define DIRECT_A1D_ELEM(v, i)
MultidimArray< double > Zscore
Definition: basic_pca.h:75
void evaluateZScore(int NPCA, int Niter, bool trained=false, const char *filename="temp.txt", int numdesc=0)
Definition: basic_pca.cpp:384
double * f
#define XMIPP_EQUAL_ACCURACY
Definition: xmipp_macros.h:119
#define XSIZE(v)
#define FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(v)
Error related to numerical calculation.
Definition: xmipp_error.h:179
#define DIRECT_MULTIDIM_ELEM(v, n)
std::vector< MultidimArray< double > > PCAbasis
Definition: basic_pca.h:69
#define XMIPP_MIN(x, y)
Definition: xmipp_macros.h:181
void svdcmp(const Matrix2D< T > &a, Matrix2D< double > &u, Matrix1D< double > &w, Matrix2D< double > &v)
Definition: matrix2d.cpp:125
#define j
Matrix1D< double > w
Definition: basic_pca.h:81
double sum2() const
int round(double x)
Definition: ap.cpp:7245
void reconsFromPCA(const Matrix2D< double > &CtY, std::vector< MultidimArray< float > > &recons)
Reconstruct from PCA basis.
Definition: basic_pca.cpp:153
T dotProduct(const Matrix1D< T > &v1, const Matrix1D< T > &v2)
Definition: matrix1d.h:1140
void typeCast(const Matrix1D< T1 > &v1, Matrix1D< T2 > &v2)
Definition: matrix1d.h:1227
void initZeros()
Definition: matrix2d.h:626
doublereal * u
void learnPCABasis(size_t NPCA, size_t Niter)
Learn basis.
Definition: basic_pca.cpp:170
MultidimArray< double > avg
Definition: basic_pca.h:72
void initZeros(const MultidimArray< T1 > &op)
int * n
void indexSort(MultidimArray< int > &indx) const