Xmipp  v3.23.11-Nereus
matrix_dimred.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * Authors: Carlos Oscar Sorzano (coss@cnb.csic.es)
3  *
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 #include "matrix_dimred.h"
27 #include <core/matrix2d.h>
28 
30 {
31  algorithm=NULL;
32 }
33 
35 {
36  fnIn = getParam("-i");
37  fnOut = getParam("-o");
38  fnMapping = getParam("--saveMapping");
39  dimRefMethod = getParam("-m");
40  outputDim = getIntParam("--dout");
41  dimEstMethod = getParam("--dout",1);
42 
43  if (dimRefMethod=="LTSA" || dimRefMethod=="LLTSA" || dimRefMethod=="LPP" || dimRefMethod=="LE" || dimRefMethod=="HLLE" ||
44  dimRefMethod=="NPE" || dimRefMethod=="SPE")
45  kNN=getIntParam("-m",1);
46  if (dimRefMethod=="DM" || dimRefMethod=="kPCA")
47  sigma=getDoubleParam("-m",1);
48  if (dimRefMethod=="LPP" || dimRefMethod=="LE")
49  sigma=getDoubleParam("-m",2);
50  if (dimRefMethod=="DM")
51  t=getDoubleParam("-m",2);
52  if (dimRefMethod=="pPCA")
53  Niter=getIntParam("-m",1);
54  if (dimRefMethod=="SPE")
55  global=getIntParam("-m",2)==1;
56 }
57 
58 // Show ====================================================================
60 {
61  if (verbose>0)
62  std::cout
63  << "Input metadata file: " << fnIn << std::endl
64  << "Output metadata: " << fnOut << std::endl
65  << "Output mapping: " << fnMapping << std::endl
66  << "Dim Red Method: " << dimRefMethod << std::endl
67  << "Dimension out: " << outputDim << std::endl
68  ;
69  if (dimRefMethod=="LTSA" || dimRefMethod=="LLTSA" || dimRefMethod=="LPP" || dimRefMethod=="LE" || dimRefMethod=="HLLE" ||
70  dimRefMethod=="SPE" || dimRefMethod=="NPE")
71  std::cout << "k=" << kNN << std::endl;
72  if (dimRefMethod=="DM" || dimRefMethod=="kPCA" || dimRefMethod=="LPP" || dimRefMethod=="LE")
73  std::cout << "sigma=" << sigma << std::endl;
74  if (dimRefMethod=="DM")
75  std::cout << "t=" << t << std::endl;
76  if (dimRefMethod=="pPCA")
77  std::cout << "Niter=" << Niter << std::endl;
78  if (dimRefMethod=="SPE")
79  std::cout << "Global=" << global << std::endl;
80 }
81 
82 // usage ===================================================================
84 {
85  processDefaultComment("-i","-i <input>");
86  processDefaultComment("-o","[-o <output=\"\">]");
87  addParamsLine(" [-m <dimRefMethod=PCA>]: Dimensionality Reduction method selected");
88  addParamsLine(" where <dimRefMethod>");
89  addParamsLine(" PCA : Principal Component Analysis");
90  addParamsLine(" LTSA <k=12> : Local Tangent Space Alignment, k=number of nearest neighbours");
91  addParamsLine(" DM <s=1> <t=1> : Diffusion map, t=Markov random walk, s=kernel sigma");
92  addParamsLine(" LLTSA <k=12> : Linear Local Tangent Space Alignment, k=number of nearest neighbours");
93  addParamsLine(" LPP <k=12> <s=1> : Linearity Preserving Projection, k=number of nearest neighbours, s=kernel sigma");
94  addParamsLine(" kPCA <s=1> : Kernel PCA, s=kernel sigma");
95  addParamsLine(" pPCA <n=200> : Probabilistic PCA, n=number of iterations");
96  addParamsLine(" LE <k=7> <s=1> : Laplacian Eigenmap, k=number of nearest neighbours, s=kernel sigma");
97  addParamsLine(" HLLE <k=12> : Hessian Locally Linear Embedding, k=number of nearest neighbours");
98  addParamsLine(" SPE <k=12> <global=1> : Stochastic Proximity Embedding, k=number of nearest neighbours, global embedding or not");
99  addParamsLine(" NPE <k=12> : Neighborhood Preserving Embedding, k=number of nearest neighbours");
100  addParamsLine(" [--dout <d=2> <method=CorrDim>] : Output dimension. Set to -1 for automatic estimation with a specific method");
101  addParamsLine(" where <method>");
102  addParamsLine(" CorrDim: Correlation dimension");
103  addParamsLine(" MLE: Maximum Likelihood Estimate");
104  addParamsLine(" [--saveMapping <fn=\"\">] : Save mapping if available (PCA, LLTSA, LPP, pPCA, NPE) so that it can be reused later (Y=X*M) (use the flag --more for details)");
105  addParamsLine(" :+Y is the output matrix with individuals as rows, M is the mapping matrix");
106  addParamsLine(" :+X is the input matrix with individuals as rows, with the mean subtracted from the columns");
107  addParamsLine(" :+Python code to generate X:");
108  addParamsLine(" :+ from numpy import loadtxt, mean, outer, ones");
109  addParamsLine(" :+ X_original = loadtxt('data.txt')");
110  addParamsLine(" :+ X = X_original - outer(ones(X_original.shape[0]),mean(X_original, axis=0))");
111 }
112 
113 // Produce Side info ====================================================================
115 {
116  if (dimRefMethod=="PCA")
117  {
119  } else if (dimRefMethod=="LTSA")
120  {
123  } else if (dimRefMethod=="DM")
124  {
127  } else if (dimRefMethod=="LLTSA")
128  {
131  } else if (dimRefMethod=="LPP")
132  {
135  } else if (dimRefMethod=="kPCA")
136  {
139  } else if (dimRefMethod=="pPCA")
140  {
143  } else if (dimRefMethod=="LE")
144  {
147  } else if (dimRefMethod=="HLLE")
148  {
151  } else if (dimRefMethod=="NPE")
152  {
155  } else if (dimRefMethod=="SPE")
156  {
159  }
160 
163 }
164 
165 // Estimate dimension
167 {
170  std::cout << "Estimated dimensionality: " << outputDim << std::endl;
171  if (outputDim<=0)
172  REPORT_ERROR(ERR_NUMERICAL,"Cannot proceed without an output dimension");
173 }
174 
176 {
178  inputDim = getIntParam("--din");
179  Nsamples = getIntParam("--samples");
180 
181  if (outputDim != -1 && outputDim > inputDim)
182  throw std::invalid_argument("Error, output dimension should be smaller or equal to input dimension");
183 
184 }
185 
187 {
188  if (verbose>0)
189  {
191  std::cerr
192  << "Dimension in: " << inputDim << std::endl
193  << "Number of samples: " << Nsamples << std::endl
194  ;
195  }
196 }
197 
199 {
200  addUsageLine("This program takes an input matrix, whose rows are individual observations and ");
201  addUsageLine("projects each sample onto a lower dimensional space using the selected method. Note: ");
202  addUsageLine("that each observation component in a row should be separated by white spaces.");
203  setDefaultComment("-i","Input matrix with data. Each observation is a row.");
204  setDefaultComment("-o","Output matrix with projected data");
206  addParamsLine(" --din <d> : Input dimension");
207  addParamsLine(" --samples <N> : Number of observations in the input matrix");
208  addExampleLine("xmipp_matrix_dimred -i matrixIn.txt -o matrixOut.txt --din 30 --dout 2 --samples 1000");
209 }
210 
211 // Produce side info ======================================================
213 {
215  X.resizeNoCopy(Nsamples,inputDim);
216  X.read(fnIn);
218 }
219 
220 // Run ====================================================================
222 {
223  show();
224  produceSideInfo();
225  if (outputDim<0)
229 }
DiffusionMaps algorithmDiffusionMaps
Definition: matrix_dimred.h:71
virtual void produceSideInfo()
Produce side info.
ProgDimRed()
Empty constructor.
void readParams()
Read argument from command line.
double getDoubleParam(const char *param, int arg=0)
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
void run()
Main routine.
void setInputData(Matrix2D< double > &X)
Set input data.
void setSpecificParameters(size_t Niters=200)
Set specific parameters.
NPE algorithmNPE
Definition: matrix_dimred.h:78
void setDefaultComment(const char *param, const char *comment)
KernelPCA algorithmKernelPCA
Definition: matrix_dimred.h:74
void processDefaultComment(const char *param, const char *left)
const Matrix2D< double > & getReducedData()
Get reduced data.
String dimEstMethod
Definition: matrix_dimred.h:59
DimRedAlgorithm * algorithm
Definition: matrix_dimred.h:68
FileName fnIn
Definition: matrix_dimred.h:51
void resizeNoCopy(int Ydim, int Xdim)
Definition: matrix2d.h:534
void setSpecificParameters(double sigma=1.0)
Set specific parameters.
Definition: kernelPCA.cpp:9
void setSpecificParameters(int k=12)
Set specific parameters.
Definition: npe.cpp:32
void setSpecificParameters(int kNeighbours=12)
Set specific parameters.
Definition: hessianLLE.cpp:26
const char * getParam(const char *param, int arg=0)
void show()
Show.
virtual void reduceDimensionality()=0
Reduce dimensionality.
LTSA algorithmLTSA
Definition: matrix_dimred.h:70
Error related to numerical calculation.
Definition: xmipp_error.h:179
void addExampleLine(const char *example, bool verbatim=true)
virtual void readParams()
Read argument from command line.
void produceSideInfo()
Produce side info.
int verbose
Verbosity level.
void estimateDimension()
Estimate dimensionality.
LaplacianEigenmap algorithmLaplacianEigenmap
Definition: matrix_dimred.h:76
SPE algorithmSPE
Definition: matrix_dimred.h:79
LLTSA algorithmLLTSA
Definition: matrix_dimred.h:72
void defineParams()
Define parameters.
virtual void defineParams()
Define parameters.
ProbabilisticPCA algorithmProbabilisticPCA
Definition: matrix_dimred.h:75
void setSpecificParameters(double sigma=1.0, size_t numberOfNeighbours=7)
Set specific parameters.
DimRedDistance2 distance
Distance function.
Definition: dimred_tools.h:150
double sigma
Definition: matrix_dimred.h:64
void setSpecificParameters(int k=12, double sigma=1.)
Set specific parameters.
Definition: lpp.cpp:28
FileName fnMapping
Save mapping.
Definition: dimred_tools.h:153
Matrix2D< double > X
Definition: matrix_dimred.h:67
void read(const FileName &fn)
Definition: matrix2d.cpp:101
virtual void show()
Show.
void write(const FileName &fn) const
Definition: matrix2d.cpp:113
PCA algorithmPCA
Definition: matrix_dimred.h:69
FileName fnMapping
Definition: matrix_dimred.h:55
void setOutputDimensionality(size_t outputDim)
Set output dimensionality.
void addUsageLine(const char *line, bool verbatim=false)
void setSpecificParameters(bool global=true, int k=12)
Set specific parameters.
Definition: spe.cpp:30
LPP algorithmLPP
Definition: matrix_dimred.h:73
void setSpecificParameters(int k=12)
Set specific parameters.
Definition: ltsa.cpp:26
int getIntParam(const char *param, int arg=0)
FileName fnOut
Definition: matrix_dimred.h:53
double intrinsicDimensionality(Matrix2D< double > &X, const String &method, bool normalize, DimRedDistance2 f)
void setSpecificParameters(double t=1.0, double sigma=1.0)
Set specific parameters.
String dimRefMethod
Definition: matrix_dimred.h:59
void addParamsLine(const String &line)
HessianLLE algorithmHessianLLE
Definition: matrix_dimred.h:77