Xmipp  v3.23.11-Nereus
pca.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Jorge Garcia de la Nava Ruiz (gdl@ac.uma.es)
4  * Carlos Oscar S. Sorzano (coss@cnb.csic.es)
5  * Alberto Pascual Montano (pascual@cnb.csic.es)
6  *
7  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22  * 02111-1307 USA
23  *
24  * All comments concerning this program package may be sent to the
25  * e-mail address 'xmipp@cnb.csic.es'
26  ***************************************************************************/
27 
28 #include <sstream>
29 
30 #ifdef __sun
31 #include <ieeefp.h>
32 #endif
33 
34 #include "pca.h"
35 
41 {
42  std::vector<unsigned> dummy;
43  for (unsigned i = 0;i < ts.size();i++)
44  dummy.push_back(i);
45  reset(ts, dummy);
46 }
47 
53 void PCAAnalyzer::reset(ClassicTrainingVectors const &ts, std::vector<unsigned> const & idx)
54 {
55  std::vector<FeatureVector> a;
56  int n = ts.dimension();
57  a.resize(n);
58  int verbosity = listener->getVerbosity();
59 
60  {
61  if (verbosity)
62  listener->OnReportOperation((std::string) "Normalizing....\n");
63  if (verbosity == 1)
64  listener->OnInitOperation(n);
65 
66  //Get the mean of the given cluster of vectors
67  for (int k = 0;k < n;k++)
68  {
69  a[k].resize(n);
70  floatFeature sum = 0.0;
71  int l = 0;
72  for (std::vector<unsigned>::const_iterator i = idx.begin();i != idx.end();i++)
73  {
74  if (finite(ts.itemAt(*i)[k]))
75  {
76  sum += ts.itemAt(*i)[k];
77  l++;
78  }
79  }
80  mean.push_back(sum / l);
81  if (verbosity == 1)
82  listener->OnProgress(k);
83  }
84  if (verbosity == 1)
85  listener->OnProgress(n);
86 
87  if (verbosity == 1)
88  listener->OnInitOperation(n);
89  for (int i = 0;i < n;i++)
90  {
91  for (int j = 0;j <= i;j++)
92  {
93  floatFeature sum = 0.0;
94  int l = 0;
95  for (std::vector<unsigned>::const_iterator it = idx.begin();it != idx.end();it++)
96  {
97  floatFeature d1 = ts.itemAt(*it)[i] - mean[i];
98  floatFeature d2 = ts.itemAt(*it)[j] - mean[j];
99  if (finite(d1) && finite(d2))
100  {
101  sum += d1 * d2;
102  l++;
103  }
104  }
105  if (l)
106  a[i][j] = a[j][i] = sum / l;
107  else
108  a[i][j] = a[j][i] = 0;
109  }
110  if (verbosity == 1)
111  listener->OnProgress(i);
112  }
113  if (verbosity == 1)
114  listener->OnProgress(n);
115 
116  // for(int i=0;i<n;i++)
117  // std::cout << a[i] << std::endl;
118  }
119 
120  eigenval.resize(n);
121  eigenvec.resize(n);
122  set_Dimension(n);
123 
125  b.resize(n);
127  z.resize(n);
129  std::vector<FeatureVector> &v = eigenvec;
130 
131  for (int i = 0;i < n;i++)
132  {
133  v[i].resize(n);
134  v[i][i] = 1.0;
135  b[i] = d[i] = a[i][i];
136  }
137 
138  int nrot = 0;
139 
140  if (verbosity)
141  listener->OnReportOperation((std::string) "Diagonalizing matrix....\n");
142  if (verbosity == 1)
143  listener->OnInitOperation(50);
144 
145  //Jacobi method (it=iterationn number)
146  for (int it = 1;it <= 50;it++)
147  {
148  if ((verbosity == 1) && (it == 1))
149  listener->OnProgress(0);
150 
151  floatFeature tresh;
152  floatFeature sm = 0.0;
153  for (int ip = 0; ip < n - 1; ip++)
154  {
155  for (int iq = ip + 1; iq < n; iq++)
156  sm += fabs(a[iq][ip]);
157  }
158  if (sm == 0.0)
159  {//Done. Sort vectors
160  for (int i = 0; i < n - 1; i++)
161  {
162  int k = i;
163  floatFeature p = d[i];
164 
165  for (int j = i + 1; j < n; j++)
166  if (d[j] >= p)
167  p = d[k = j];
168 
169  if (k != i)
170  {//Swap i<->k
171  d[k] = d[i];
172  d[i] = p;
173  FeatureVector t = v[i];
174  v[i] = v[k];
175  v[k] = t;
176  }
177  }
178  if (verbosity == 1)
179  listener->OnProgress(50);
180  return;
181  }
182 
183  if (it < 4)
184  tresh = 0.2 * sm / (n * n);
185  else
186  tresh = 0;
187 
188  for (int ip = 0; ip < n - 1; ip++)
189  {
190  for (int iq = ip + 1; iq < n; iq++)
191  {
192  floatFeature g = 100.0 * fabs(a[iq][ip]);
193 
194  if (it > 4
195  && fabs(d[ip]) + g == fabs(d[ip])
196  && fabs(d[iq]) + g == fabs(d[iq]))
197  a[iq][ip] = 0.0;
198  else if (fabs(a[iq][ip]) > tresh)
199  {
200  floatFeature tau;
201  floatFeature t;
202  floatFeature s;
203  floatFeature c;
204  floatFeature h = d[iq] - d[ip];
205  if (fabs(h) + g == fabs(h))
206  t = a[iq][ip] / h;
207  else
208  {
209  floatFeature theta = 0.5 * h / a[iq][ip];
210  t = 1.0 / (fabs(theta) + sqrt(1.0 + theta * theta));
211  if (theta < 0.0)
212  t = -t;
213  }
214  c = 1.0 / sqrt(1 + t * t);
215  s = t * c;
216  tau = s / (1.0 + c);
217  h = t * a[iq][ip];
218  z[ip] -= h;
219  z[iq] += h;
220  d[ip] -= h;
221  d[iq] += h;
222  a[iq][ip] = 0.0;
223 
224 #define rotate(a,i,j,k,l) \
225  g = a[i][j]; \
226  h = a[k][l]; \
227  a[i][j] = g - s *(h + g*tau); \
228  a[k][l] = h + s*(g - h*tau);
229 
230  int j;
231  for (j = 0; j < ip; j++)
232  {
233  rotate(a, ip, j, iq, j)
234  }
235  for (j = ip + 1; j < iq; j++)
236  {
237  rotate(a, j, ip, iq, j)
238  }
239  for (j = iq + 1; j < n; j++)
240  {
241  rotate(a, j, ip, j, iq)
242  }
243  for (j = 0; j < n; j++)
244  {
245  rotate(v, ip, j, iq, j)
246  }
247 
248  nrot += 1;
249  }//if
250  }//for iq
251  }//for ip
252  for (int ip = 0; ip < n; ip++)
253  {
254  b[ip] += z[ip];
255  d[ip] = b[ip];
256  z[ip] = 0.0;
257  }
258 
259  if (verbosity == 1)
260  listener->OnProgress(it - 1);
261 
262  }//for it
263 
264  if (verbosity == 1)
265  listener->OnProgress(50);
266 
267 
268  REPORT_ERROR(ERR_NUMERICAL, "too many Jacobi iterations");
269 }
270 
271 #ifdef UNUSED // detected as unused 29.6.2018
272 /* Prepare for correlation ------------------------------------------------- */
273 void PCAAnalyzer::prepare_for_correlation()
274 {
275  int nmax = D;
276  int dmax = mean.size();
277 
278  // Initialize
279  prod_ei_mean.resize(nmax);
280  prod_ei_ei.resize(nmax);
281  avg_ei.resize(nmax);
282 
283  // Compute products <ei,ei>, <ei,mean>
284  for (int n = 0; n < nmax; n++)
285  {
286  prod_ei_ei[n] = prod_ei_mean[n] = avg_ei[n] = 0;
287  for (int d = 0; d < dmax; d++)
288  {
289  prod_ei_mean[n] += eigenvec[n][d] * mean[d];
290  prod_ei_ei[n] += eigenvec[n][d] * eigenvec[n][d];
291  avg_ei[n] += eigenvec[n][d];
292  }
293  avg_ei[n] /= dmax;
294  }
295 
296  // Compute product <mean,mean>
297  prod_mean_mean = avg_mean = 0;
298  for (int d = 0; d < dmax; d++)
299  {
300  prod_mean_mean += mean[d] * mean[d];
301  avg_mean += mean[d];
302  }
303  avg_mean /= dmax;
304 }
305 
307 void PCAAnalyzer::setIdentity(int n)
308 {
309  if (n < 0)
310  n = 0;
311  eigenval.resize(n);
312  fill(eigenval.begin(), eigenval.end(), 1.0);
313  eigenvec.resize(n);
314  for (int i = 0;i < n;i++)
315  {
316  eigenvec[i].resize(n);
317  fill(eigenvec[i].begin(), eigenvec[i].end(), 0.0);
318  eigenvec[i][i] = 1.0;
319  }
320 }
321 
322 /* Components for variance ------------------------------------------------- */
323 int PCAAnalyzer::Dimension_for_variance(double th_var)
324 {
325  int imax = eigenval.size();
326  double sum = 0;
327  th_var /= 100;
328  for (int i = 0; i < imax; i++)
329  sum += eigenval[i];
330 
331  double explained = 0;
332  int i = 0;
333  do
334  {
335  explained += eigenval[i++];
336  }
337  while (explained / sum < th_var);
338  return i;
339 }
340 
341 /* Project ----------------------------------------------------------------- */
342 void PCAAnalyzer::Project(FeatureVector &input, FeatureVector &output)
343 {
344  if (input.size() != eigenvec[0].size())
345  REPORT_ERROR(ERR_MULTIDIM_DIM, "PCA_project: vectors are not of the same size");
346 
347  int size = input.size();
348  output.resize(D);
349  for (int i = 0; i < D; i++)
350  {
351  output[i] = 0;
352  // Comput the dot product between the input and the PCA vector[i]
353  for (int j = 0; j < size; j++)
354  output[i] += input[j] * eigenvec[i][j];
355  }
356 }
357 #endif
358 
359 /* Clear ------------------------------------------------------------------- */
361 {
362  set_Dimension(0);
363  mean.clear();
364  eigenvec.clear();
365  eigenval.clear();
366 }
367 
368 /* Show/read PCA ----------------------------------------------------------- */
369 std::ostream& operator << (std::ostream &out, const PCAAnalyzer &PC)
370 {
371  out << "Relevant Dimension: " << PC.get_Dimension() << std::endl;
372  out << "Mean vector: ";
373  int size = PC.mean.size();
374  out << "(" << size << ") ---> ";
375  for (int j = 0; j < size; j++)
376  out << PC.mean[j] << " ";
377  out << std::endl;
378  for (int i = 0; i < PC.get_Dimension(); i++)
379  {
380  out << PC.eigenval[i] << " (" << size << ") ---> ";
381  for (int j = 0; j < size; j++)
382  out << PC.eigenvec[i][j] << " ";
383  out << std::endl;
384  }
385  return out;
386 }
387 
389 {
390  PC.clear();
391  int D;
392  std::string read_line;
393  getline(in, read_line);
394  sscanf(read_line.c_str(), "Relevant Dimension: %d", &D);
395  PC.set_Dimension(D);
396  PC.eigenval.resize(D);
397  PC.eigenvec.resize(D);
398 
399  int size;
400  getline(in, read_line);
401  sscanf(read_line.c_str(), "Mean vector: (%d) --->", &size);
402  read_line.erase(0, read_line.find('>') + 1); // remove until --->
403  PC.mean.resize(size);
404  std::istringstream istr1(read_line.c_str());
405  for (int j = 0; j < size; j++)
406  istr1 >> PC.mean[j];
407 
408  for (int i = 0; i < D; i++)
409  {
410  getline(in, read_line);
411  float f;
412  sscanf(read_line.c_str(), "%f (%d) ---> ", &f, &size);
413  PC.eigenval[i] = f;
414  read_line.erase(0, read_line.find('>') + 1); // remove until --->
415  PC.eigenvec[i].resize(size);
416  std::istringstream istr2(read_line.c_str());
417  for (int j = 0; j < size; j++)
418  istr2 >> PC.eigenvec[i][j];
419  }
420 
421  return in;
422 }
423 
424 
425 #ifdef UNUSED // detected as unused 29.6.2018
426 /* Create empty PCA -------------------------------------------------------- */
427 int PCA_set::create_empty_PCA(int n)
428 {
429  int retval = PCA.size();
430  PCA.resize(retval + n);
431  for (int i = 0; i < n; i++)
432  PCA[retval+i] = new PCAAnalyzer;
433  return retval;
434 }
435 #endif
436 
437 /* Show/Read PCAset -------------------------------------------------------- */
438 std::ostream& operator << (std::ostream &out, const PCA_set &PS)
439 {
440  int imax = PS.PCA.size();
441  out << "Number of PCAs: " << imax << std::endl;
442  for (int i = 0; i < imax; i++)
443  out << PS.PCA[i];
444  return out;
445 }
446 
448 {
449  int imax;
450  std::string read_line;
451  getline(in, read_line);
452  sscanf(read_line.c_str(), "Number of PCAs: %d\n", &imax);
453  PS.PCA.resize(imax);
454  for (int i = 0; i < imax; i++)
455  {
456  in >> PS.PCA[i];
457  }
458  return in;
459 }
460 
461 /* Running PCA constructor ------------------------------------------------- */
463 {
464  J = _J;
465  d = _d;
466  n = 0;
467  sum_all_samples.initZeros(d);
468  current_sample_mean.initZeros(d);
469  sum_proj.initZeros(d);
470  sum_proj2.initZeros(d);
471  eigenvectors.initZeros(d, J);
472 }
473 
474 #ifdef UNUSED // detected as unused 29.6.2018
475 /* Update with new sample -------------------------------------------------- */
476 void Running_PCA::new_sample(const Matrix1D<double> &sample)
477 {
478  n++;
479 
480  // Re-estimate sample mean
481  sum_all_samples += sample;
482  current_sample_mean = sum_all_samples / n;
483 
484  // Estimate eigenvectors
485  Matrix1D<double> un = sample;
486  un.row = false;
487  for (int j = 0; j < J; j++)
488  {
489  if (n <= j + 1)
490  {
491  // If there are not enough samples to estimate this eigenvector, then
492  // skip it
493  double norm = un.module();
494  if (norm > XMIPP_EQUAL_ACCURACY)
495  un /= norm;
496  eigenvectors.setCol(j, un);
497  }
498  else
499  {
500  // If there are enough samples
501  // Subtract the sample mean to have a zero-mean vector
502  if (j == 0)
503  un -= current_sample_mean;
504 
505  // Compute the scale of this vector as the dot product
506  // between un and the current eigenvector estimate
507  double scale = 0;
508  for (int i = 0; i < d; i++)
509  scale += un(i) * eigenvectors(i, j);
510 
511  // Re-estimate the eigenvector
512  double norm2 = 0;
513  double w1 = (double)(n - 1.0) / n;
514  double w2 = (1.0 - w1) * scale;
515  for (int i = 0; i < d; i++)
516  {
517  eigenvectors(i, j) =
518  w1 * eigenvectors(i, j) +
519  w2 * un(i);
520  norm2 += eigenvectors(i, j) *
521  eigenvectors(i, j);
522  }
523 
524  // Renormalize
525  if (norm2 > XMIPP_EQUAL_ACCURACY)
526  {
527  double norm = sqrt(norm2);
528  for (int i = 0; i < d; i++)
529  eigenvectors(i, j) /= norm;
530  }
531 
532  // Project un onto the space spanned by this eigenvector
533  double project = 0;
534  for (int i = 0; i < d; i++)
535  project += un(i) * eigenvectors(i, j);
536  for (int i = 0; i < d; i++)
537  un(i) -= project * eigenvectors(i, j);
538 
539  // Update the variance of this vector
540  sum_proj(j) += project;
541  sum_proj2(j) += project * project;
542  }
543  }
544 }
545 #endif
546 
547 /* Project a sample vector on the PCA space -------------------------------- */
549  Matrix1D<double> &output) const
550 {
551  output.initZeros(J);
552  for (int j = 0; j < J; j++)
553  for (int i = 0; i < d; i++)
554  output(j) += (input(i) - current_sample_mean(i)) * eigenvectors(i, j);
555 }
int * nmax
double module() const
Definition: matrix1d.h:983
void set_Dimension(int _D)
Definition: pca.h:130
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
doublereal * c
doublereal * g
void sqrt(Image< double > &op)
virtual void OnReportOperation(const std::string &_rsOp)=0
virtual void OnInitOperation(unsigned long _est_it)=0
virtual const unsigned & getVerbosity() const
Definition: xmipp_funcs.h:1065
float floatFeature
Definition: data_types.h:72
std::vector< double > avg_ei
Definition: pca.h:110
T norm(const std::vector< T > &v)
Definition: vector_ops.h:399
Definition: pca.h:34
double prod_mean_mean
Definition: pca.h:98
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
#define rotate(a, i, j, k, l)
doublereal * d
double theta
void project(const Matrix1D< double > &input, Matrix1D< double > &output) const
Definition: pca.cpp:548
doublereal * b
Running_PCA(int _J, int _d)
Definition: pca.cpp:462
unsigned dimension() const
double avg_mean
Definition: pca.h:107
int in
double * f
#define XMIPP_EQUAL_ACCURACY
Definition: xmipp_macros.h:119
std::vector< PCAAnalyzer > PCA
Definition: pca.h:191
Error related to numerical calculation.
Definition: xmipp_error.h:179
bool row
<0=column vector (default), 1=row vector
Definition: matrix1d.h:267
double z
virtual void OnProgress(unsigned long _it)=0
Definition: pca.h:187
std::vector< double > prod_ei_mean
Definition: pca.h:101
double dummy
void reset(ClassicTrainingVectors const &ts)
Definition: pca.cpp:40
for(j=1;j<=i__1;++j)
basic_istream< char, std::char_traits< char > > istream
Definition: utilities.cpp:815
void initZeros()
Definition: matrix1d.h:592
#define dmax(a, b)
int D
Definition: pca.h:95
#define j
void clear()
Definition: pca.cpp:360
friend std::ostream & operator<<(std::ostream &out, const PCAAnalyzer &PC)
Definition: pca.cpp:369
int get_Dimension() const
Definition: pca.h:136
FeatureVector mean
Definition: pca.h:92
Incorrect MultidimArray dimensions.
Definition: xmipp_error.h:173
std::vector< double > prod_ei_ei
Definition: pca.h:104
friend std::istream & operator>>(std::istream &in, PCAAnalyzer &PC)
Definition: pca.cpp:388
std::vector< floatFeature > FeatureVector
Definition: data_types.h:86
const Item & itemAt(unsigned _i) const
Definition: training_set.h:264
FeatureVector eigenval
Definition: pca.h:87
int * n
doublereal * a
std::vector< FeatureVector > eigenvec
Definition: pca.h:82