Xmipp  v3.23.11-Nereus
Classes | Typedefs | Enumerations | Functions
Tools for dimensionality reduction
Collaboration diagram for Tools for dimensionality reduction:

Classes

class  GenerateData
 
class  DimRedAlgorithm
 

Typedefs

typedef double(* DimRedDistance2) (const Matrix2D< double > &X, size_t i1, size_t i2)
 

Enumerations

enum  DatasetType {
  DatasetType::SWISS, DatasetType::HELIX, DatasetType::TWINPEAKS, DatasetType::CLUSTER3D,
  DatasetType::INTERSECT
}
 

Functions

void computeDistance (const Matrix2D< double > &X, Matrix2D< double > &distance, DimRedDistance2 f=NULL, bool computeSqrt=true)
 
void computeRandomPointsDistance (const Matrix2D< double > &X, Matrix1D< double > &distance, Matrix1D< int > ind1, Matrix1D< int > ind2, DimRedDistance2 f, bool computeSqrt)
 
void computeDistanceToNeighbours (const Matrix2D< double > &X, int K, Matrix2D< double > &distance, DimRedDistance2 f=NULL, bool computeSqrt=true)
 
void computeSimilarityMatrix (Matrix2D< double > &D2, double sigma, bool skipZeros=false, bool normalize=false)
 
void computeGraphLaplacian (const Matrix2D< double > &G, Matrix2D< double > &L)
 
double intrinsicDimensionality (Matrix2D< double > &X, const String &method="MLE", bool normalize=true, DimRedDistance2 f=NULL)
 
void kNearestNeighbours (const Matrix2D< double > &X, int K, Matrix2D< int > &idx, Matrix2D< double > &distance, DimRedDistance2 f=NULL, bool computeSqrt=true)
 
void extractNearestNeighbours (const Matrix2D< double > &X, Matrix2D< int > &idx, int i, Matrix2D< double > &Xi)
 

Detailed Description

Typedef Documentation

◆ DimRedDistance2

typedef double(* DimRedDistance2) (const Matrix2D< double > &X, size_t i1, size_t i2)

Function type to compute the squared distance between individuals i1 and i2 of X

Definition at line 75 of file dimred_tools.h.

Enumeration Type Documentation

◆ DatasetType

enum DatasetType
strong

Generate test data. Translated from drtools/generate_data.m http://homepage.tudelft.nl/19j49/Matlab_Toolbox_for_Dimensionality_Reduction.html

Original code by Laurens van der Maaten, Delft University of Technology

Enumerator
SWISS 
HELIX 
TWINPEAKS 
CLUSTER3D 
INTERSECT 

Definition at line 42 of file dimred_tools.h.

Function Documentation

◆ computeDistance()

void computeDistance ( const Matrix2D< double > &  X,
Matrix2D< double > &  distance,
DimRedDistance2  f = NULL,
bool  computeSqrt = true 
)

Compute the distance of all vs all elements in a matrix of observations. Each observation is a row of the matrix X.

Definition at line 274 of file dimred_tools.cpp.

275 {
276  distance.initZeros(MAT_YSIZE(X),MAT_YSIZE(X));
277  for (size_t i1=0; i1<MAT_YSIZE(X)-1; ++i1)
278  for (size_t i2=i1+1; i2<MAT_YSIZE(X); ++i2)
279  {
280  // Compute the distance between i1 and i2
281  double d=0;
282  if (f==NULL)
283  for (int j=0; j<MAT_XSIZE(X); ++j)
284  {
285  double diff=MAT_ELEM(X,i1,j)-MAT_ELEM(X,i2,j);
286  d+=diff*diff;
287  }
288  else
289  d=(*f)(X,i1,i2);
290 
291  if (computeSqrt)
292  d=sqrt(d);
293  MAT_ELEM(distance,i2,i1)=MAT_ELEM(distance,i1,i2)=d;
294  }
295 }
#define MAT_YSIZE(m)
Definition: matrix2d.h:124
void sqrt(Image< double > &op)
doublereal * d
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
double * f
#define j
#define MAT_XSIZE(m)
Definition: matrix2d.h:120
void initZeros()
Definition: matrix2d.h:626

◆ computeDistanceToNeighbours()

void computeDistanceToNeighbours ( const Matrix2D< double > &  X,
int  K,
Matrix2D< double > &  distance,
DimRedDistance2  f = NULL,
bool  computeSqrt = true 
)

Compute the distance of each observation to its K nearest neighbours. Each observation is a row of the matrix X. If there are N observations, the size of distance is NxN.

Definition at line 297 of file dimred_tools.cpp.

298 {
299  Matrix2D<int> idx;
300  Matrix2D<double> kDistance;
301  kNearestNeighbours(X, K, idx, kDistance, f, computeSqrt);
302  distance.initZeros(MAT_YSIZE(X),MAT_YSIZE(X));
304  {
305  int idx_ij=MAT_ELEM(idx,i,j);
306  MAT_ELEM(distance,idx_ij,i)=MAT_ELEM(distance,i,idx_ij)=MAT_ELEM(kDistance,i,j);
307  }
308 }
#define FOR_ALL_ELEMENTS_IN_MATRIX2D(m)
Definition: matrix2d.h:104
#define MAT_YSIZE(m)
Definition: matrix2d.h:124
void kNearestNeighbours(const Matrix2D< double > &X, int K, Matrix2D< int > &idx, Matrix2D< double > &distance, DimRedDistance2 f, bool computeSqrt)
#define i
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
double * f
#define j
void initZeros()
Definition: matrix2d.h:626
constexpr int K

◆ computeGraphLaplacian()

void computeGraphLaplacian ( const Matrix2D< double > &  G,
Matrix2D< double > &  L 
)

Compute graph laplacian. L=D-G where D is a diagonal matrix with the row sums of G.

Definition at line 329 of file dimred_tools.cpp.

330 {
332  G.rowSum(d);
333  L.resizeNoCopy(G);
335  if (i==j)
336  MAT_ELEM(L,i,i)=VEC_ELEM(d,i)-MAT_ELEM(G,i,i);
337  else
338  MAT_ELEM(L,i,j)=-MAT_ELEM(G,i,j);
339 }
#define FOR_ALL_ELEMENTS_IN_MATRIX2D(m)
Definition: matrix2d.h:104
#define VEC_ELEM(v, i)
Definition: matrix1d.h:245
void rowSum(Matrix1D< T > &sum) const
Definition: matrix2d.cpp:779
#define i
doublereal * d
void resizeNoCopy(int Ydim, int Xdim)
Definition: matrix2d.h:534
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
#define j

◆ computeRandomPointsDistance()

void computeRandomPointsDistance ( const Matrix2D< double > &  X,
Matrix1D< double > &  distance,
Matrix1D< int >  ind1,
Matrix1D< int >  ind2,
DimRedDistance2  f,
bool  computeSqrt 
)

Compute the distance of the elements said by the arrays ind1 and ind2

Definition at line 247 of file dimred_tools.cpp.

248 {
249  distance.initZeros();
250  int ii=0;
251  for (size_t i1=0; i1<VEC_XSIZE(ind1); ++i1)
252  {
253  // Compute the distance between ind1[i] and ind2[i]
254  double d=0;
255  if (f==NULL){
256  for (size_t j=0; j<MAT_XSIZE(X); ++j)
257  {
258  double diff=MAT_ELEM(X,ind1(i1),j)-MAT_ELEM(X,ind2(i1),j);
259  d+=diff*diff;
260  }
261  }
262  else
263  d=(*f)(X,ind1(i1),ind2(i1));
264 
265  if (computeSqrt){
266  d=sqrt(d);
267  }
268 
269  distance(ii) = d;
270  ii++;
271  }
272 }
#define VEC_XSIZE(m)
Definition: matrix1d.h:77
void sqrt(Image< double > &op)
doublereal * d
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
double * f
void initZeros()
Definition: matrix1d.h:592
#define j
TYPE distance(struct Point_T *p, struct Point_T *q)
Definition: point.cpp:28
#define MAT_XSIZE(m)
Definition: matrix2d.h:120

◆ computeSimilarityMatrix()

void computeSimilarityMatrix ( Matrix2D< double > &  D2,
double  sigma,
bool  skipZeros = false,
bool  normalize = false 
)

Compute a similarity matrix from a squared distance matrix. dij=exp(-dij/(2*sigma^2)) The distance matrix can be previously normalized so that the maximum distance is 1

Definition at line 310 of file dimred_tools.cpp.

311 {
312  double maxDistance=1.0;
313  if (normalize)
314  maxDistance=D2.computeMax();
315  double K=-0.5/(sigma*sigma*maxDistance);
316  if (skipZeros)
317  {
319  if (MAT_ELEM(D2,i,j)!=0)
320  MAT_ELEM(D2,i,j)=exp(MAT_ELEM(D2,i,j)*K);
321  }
322  else
323  {
325  MAT_ELEM(D2,i,j)=exp(MAT_ELEM(D2,i,j)*K);
326  }
327 }
#define FOR_ALL_ELEMENTS_IN_MATRIX2D(m)
Definition: matrix2d.h:104
#define i
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
quaternion_type< T > normalize(quaternion_type< T > q)
Definition: point.cpp:278
#define j
constexpr int K
T computeMax() const
Definition: matrix2d.cpp:1236

◆ extractNearestNeighbours()

void extractNearestNeighbours ( const Matrix2D< double > &  X,
Matrix2D< int > &  idx,
int  i,
Matrix2D< double > &  Xi 
)

Extract k-nearest neighbours. This function extracts from the matrix X, the neighbours given by idx for the i-th observation.

Definition at line 450 of file dimred_tools.cpp.

452 {
453  Xi.resizeNoCopy(MAT_XSIZE(idx),MAT_XSIZE(X));
454  for (int j=0; j<MAT_XSIZE(idx); ++j)
455  {
456  int jNeighbour=MAT_ELEM(idx,i,j);
457  memcpy(&MAT_ELEM(Xi,j,0),&MAT_ELEM(X,jNeighbour,0),MAT_XSIZE(X)*sizeof(double));
458  }
459 }
#define i
void resizeNoCopy(int Ydim, int Xdim)
Definition: matrix2d.h:534
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
#define j
#define MAT_XSIZE(m)
Definition: matrix2d.h:120

◆ intrinsicDimensionality()

double intrinsicDimensionality ( Matrix2D< double > &  X,
const String method = "MLE",
bool  normalize = true,
DimRedDistance2  f = NULL 
)

Estimate the intrinsic dimensionality. Performs an estimation of the intrinsic dimensionality of dataset X based on the method specified by method. Possible values for method are 'CorrDim' (based on correlation dimension) and 'MLE' (maximum likelihood estimator). The default method is 'MLE'. All methods are parameterless.

Columns of the input matrix may be normalized to have zero mean and standard deviation 1.

Translated from drtools/intrinsic_dimensionality.m http://homepage.tudelft.nl/19j49/Matlab_Toolbox_for_Dimensionality_Reduction.html

Original code by Laurens van der Maaten, Delft University of Technology

Definition at line 432 of file dimred_tools.cpp.

433 {
434  if (normalize)
435  normalizeColumns(X);
436 
437  if (method=="MLE")
438  return intrinsicDimensionalityMLE(X,f);
439  else if (method=="CorrDim")
441  else
442  REPORT_ERROR(ERR_ARG_INCORRECT,"Unknown dimensionality estimate method");
443 
444  // I do not implement NearNbDim because it gives a wrong answer on the swiss roll
445  // I do not implement PackingNumbers because it is too slow on the swiss roll and gives a wrong answer
446  // I do not implement EigValue because it only provides integer dimensions
447  // I do not implement GMST because it is slower than MLE and CorrDim
448 }
void normalizeColumns(Matrix2D< double > &A)
Definition: matrix2d.cpp:188
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
double intrinsicDimensionalityCorrDim(const Matrix2D< double > &X, DimRedDistance2 f)
double * f
Incorrect argument received.
Definition: xmipp_error.h:113
double intrinsicDimensionalityMLE(const Matrix2D< double > &X, DimRedDistance2 f)
quaternion_type< T > normalize(quaternion_type< T > q)
Definition: point.cpp:278

◆ kNearestNeighbours()

void kNearestNeighbours ( const Matrix2D< double > &  X,
int  K,
Matrix2D< int > &  idx,
Matrix2D< double > &  distance,
DimRedDistance2  f = NULL,
bool  computeSqrt = true 
)

k-Nearest neighbours. Given a data matrix (each row is a sample, each column a variable), this function returns a matrix of the indexes of the K nearest neighbours to each one of the input samples sorted by distance. It also returns the corresponding distance.

The element i,j of the output matrices is the index(distance) of the j-th nearest neighbor to the i-th sample.

You can provide a distance function of your own. If not, Euclidean distance is used.

Definition at line 219 of file dimred_tools.cpp.

220 {
221  K=std::min(K,(int)MAT_YSIZE(X)-1);
222  idx.initConstant(MAT_YSIZE(X),K,-1);
223  distance.initConstant(MAT_YSIZE(X),K,1e38);
224  for (size_t i1=0; i1<MAT_YSIZE(X)-1; ++i1)
225  for (size_t i2=i1+1; i2<MAT_YSIZE(X); ++i2)
226  {
227  // Compute the distance between i1 and i2
228  double d=0;
229  if (f==NULL)
230  for (int j=0; j<MAT_XSIZE(X); ++j)
231  {
232  double diff=MAT_ELEM(X,i1,j)-MAT_ELEM(X,i2,j);
233  d+=diff*diff;
234  }
235  else
236  d=(*f)(X,i1,i2);
237 
238  // Check if they are nearest neighbours
239  insertNeighbour(idx,distance,i1,i2,d);
240  insertNeighbour(idx,distance,i2,i1,d);
241  }
242  if (computeSqrt)
244  MAT_ELEM(distance,i,j)=sqrt(MAT_ELEM(distance,i,j));
245 }
#define FOR_ALL_ELEMENTS_IN_MATRIX2D(m)
Definition: matrix2d.h:104
#define MAT_YSIZE(m)
Definition: matrix2d.h:124
void min(Image< double > &op1, const Image< double > &op2)
void insertNeighbour(Matrix2D< int > &idx, Matrix2D< double > &distance, int i1, int i2, double d)
void initConstant(T val)
Definition: matrix2d.h:602
void sqrt(Image< double > &op)
#define i
doublereal * d
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
double * f
#define j
#define MAT_XSIZE(m)
Definition: matrix2d.h:120
constexpr int K