Xmipp  v3.23.11-Nereus
Public Member Functions | Public Attributes | List of all members

#include <dimred_tools.h>

Collaboration diagram for GenerateData:
Collaboration graph
[legend]

Public Member Functions

void generateNewDataset (const DatasetType &type, int N=1000, double noise=0.05)
 

Public Attributes

Matrix2D< double > X
 
Matrix2D< double > t
 
Matrix1D< unsigned char > label
 

Detailed Description

Definition at line 43 of file dimred_tools.h.

Member Function Documentation

◆ generateNewDataset()

void GenerateData::generateNewDataset ( const DatasetType type,
int  N = 1000,
double  noise = 0.05 
)

Generate data with a given number of points and a given method. Generates an artificial dataset. Possible datasets are: 'swiss' for the Swiss roll dataset, 'helix' for the helix dataset, 'twinpeaks' for the twinpeaks dataset, '3d_clusters' for the 3D clusters dataset, and 'intersect' for the intersecting dataset. The variable n indicates the number of datapoints to generate. The variable noise indicates the amount of noise that is added to the data. The function generates the high-dimensional dataset in X, and corresponding labels in labels. In addition, the function keeps the coordinates of the datapoints on the underlying manifold in t.

Definition at line 31 of file dimred_tools.cpp.

32 {
34  X.resizeNoCopy(N,3);
35  t.resizeNoCopy(N,2);
36 
37  auto g = std::mt19937();
38  if (0 == noise) {
39  g.seed(42);
40  } else {
41  std::random_device rd;
42  g.seed(rd());
43  }
44 
45  auto distUniform = std::uniform_real_distribution<>(0.0, 1.0);
46  auto distGauss = std::normal_distribution<>(0.0, 1.0);
47  switch (type) {
48  case DatasetType::SWISS:
49  {
50  for (int i=0; i<N; ++i)
51  {
52  // Generate t
53  MAT_ELEM(t,i,0)=(3 * PI / 2) * (1 + 2 * distUniform(g)); // t
54  MAT_ELEM(t,i,1) = 30 * distUniform(g); // height
55  double localT=MAT_ELEM(t,i,0);
56  double localHeight=MAT_ELEM(t,i,1);
57 
58  // Generate X
59  double s,c;
60  //sincos(localT,&s,&c);
61  s = sin(localT);
62  c = cos(localT);
63  MAT_ELEM(X,i,0)=localT * c + noise * distGauss(g);
64  MAT_ELEM(X,i,1)=localHeight + noise * distGauss(g);
65  MAT_ELEM(X,i,2)=localT * s + noise * distGauss(g);
66 
67  // Generate label
68  VEC_ELEM(label,i)=(unsigned char)(round(localT/2)+round(localHeight/12))%2;
69  }
70  break;
71  }
72  case DatasetType::HELIX:
73  {
74  double iN=1.0/N;
75  for (int i=0; i<N; ++i)
76  {
77  // Generate t
78  MAT_ELEM(t,i,0)=2 * PI * i*iN;
79  MAT_ELEM(t,i,1)=30 * distUniform(g); // height
80  double localT=MAT_ELEM(t,i,0);
81 
82  // Generate X
83  double s,c;
84  //sincos(localT,&s,&c);
85  s = sin(localT);
86  c = cos(localT);
87  double s8,c8;
88  //sincos(8*localT,&s8,&c8);
89  s8 = sin(8*localT);
90  c8 = cos(8*localT);
91  MAT_ELEM(X,i,0)=(2 + c8)*c+noise*distGauss(g);
92  MAT_ELEM(X,i,1)=(2 + c8)*s+noise*distGauss(g);
93  MAT_ELEM(X,i,2)=s8+noise*distGauss(g);
94 
95  // Generate label
96  VEC_ELEM(label,i)=(unsigned char)(round(localT * 1.5))%2;
97  }
98  break;
99  }
101  {
102  int actualN=round(sqrt(N));
103  X.resizeNoCopy(actualN*actualN,3);
104  t.resizeNoCopy(actualN*actualN,2);
105  label.resizeNoCopy(actualN*actualN);
106  for (int ii=0; ii<actualN; ii++)
107  {
108  for (int jj=0; jj<actualN; jj++)
109  {
110  int i=ii*actualN+jj;
111 
112  // Generate t
113  double x = 1 - 2 * distUniform(g);
114  double y = 1 - 2 * distUniform(g);
115  MAT_ELEM(t,i,0)=x;
116  MAT_ELEM(t,i,1)=y;
117 
118  // Generate X
119  MAT_ELEM(X,i,0)=x+noise*distGauss(g);
120  MAT_ELEM(X,i,1)=y+noise*distGauss(g);
121  double z=10*sin(PI * x) * tanh(3 * y);
122  MAT_ELEM(X,i,2)=z+noise*distGauss(g);
123 
124  // Generate label
125  VEC_ELEM(label,i)=(unsigned char)(round(0.1*(x+y+z-3)))%2;
126  }
127  }
128  break;
129  }
131  {
132  // Create centers
133  std::vector<Matrix1D<double> > centers;
134  Matrix1D<double> center(3);
135  const int Nclusters=5;
136  for (int i=0; i<Nclusters; i++)
137  {
139  VEC_ELEM(center,i)=10*distUniform(g);
140  centers.push_back(center);
141  }
142 
143  // Measure the minimum distance between centers
144  Matrix1D<double> diff;
145  double minDistance=1e38;
146  for (int i=0; i<Nclusters-1; ++i)
147  for (int j=i+1; j<Nclusters; ++j)
148  {
149  diff=centers[i]-centers[j];
150  double distance=diff.module();
151  minDistance=std::min(minDistance,distance);
152  }
153 
154  // Create clusters
155  t.initZeros();
156  double sigma=minDistance/sqrt(12);
157  for (int n=0; n<N; ++n)
158  {
159  int i=(Nclusters*n)/N;
160  const Matrix1D<double> &center=centers[i];
161  MAT_ELEM(X,n,0)=XX(center)+(distUniform(g)-0.5)*sigma+noise*distGauss(g);
162  MAT_ELEM(X,n,1)=YY(center)+(distUniform(g)-0.5)*sigma+noise*distGauss(g);
163  MAT_ELEM(X,n,2)=ZZ(center)+(distUniform(g)-0.5)*sigma+noise*distGauss(g);
164  }
165  break;
166  }
168  {
169  double iN=1.0/N;
170  for (int i=0; i<N; ++i)
171  {
172  // Generate t
173  MAT_ELEM(t,i,0)=2 * PI * i*iN;
174  MAT_ELEM(t,i,1)=5*distUniform(g);
175  double localT=MAT_ELEM(t,i,0);
176  double height=MAT_ELEM(t,i,1);
177 
178  // Generate X
179  double s,c;
180  //sincos(localT,&s,&c);
181  s = sin(localT);
182  c = cos(localT);
183  MAT_ELEM(X,i,0)=c+noise*distGauss(g);
184  MAT_ELEM(X,i,1)=c*s+noise*distGauss(g);
185  MAT_ELEM(X,i,2)=height+noise*distGauss(g);
186 
187  // Generate label
188  VEC_ELEM(label,i)=(unsigned char)(round(localT *0.5)+round(height*0.5))%2;
189  }
190  break;
191  }
192  default:
193  REPORT_ERROR(ERR_ARG_INCORRECT,"Incorrect method passed to generate data");
194  }
195 }
void min(Image< double > &op1, const Image< double > &op2)
#define VEC_ELEM(v, i)
Definition: matrix1d.h:245
double module() const
Definition: matrix1d.h:983
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
doublereal * c
doublereal * g
void sqrt(Image< double > &op)
static double * y
Matrix1D< unsigned char > label
Definition: dimred_tools.h:56
doublereal * x
#define i
void resizeNoCopy(int Ydim, int Xdim)
Definition: matrix2d.h:534
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
#define FOR_ALL_ELEMENTS_IN_MATRIX1D(v)
Definition: matrix1d.h:72
Matrix2D< double > t
Definition: dimred_tools.h:53
#define XX(v)
Definition: matrix1d.h:85
viol type
Incorrect argument received.
Definition: xmipp_error.h:113
Matrix2D< double > X
Definition: dimred_tools.h:48
double z
#define j
#define YY(v)
Definition: matrix1d.h:93
TYPE distance(struct Point_T *p, struct Point_T *q)
Definition: point.cpp:28
int round(double x)
Definition: ap.cpp:7245
void initZeros()
Definition: matrix2d.h:626
void resizeNoCopy(int Xdim)
Definition: matrix1d.h:458
#define PI
Definition: tools.h:43
int * n
#define ZZ(v)
Definition: matrix1d.h:101

Member Data Documentation

◆ label

Matrix1D<unsigned char> GenerateData::label

Vector of labels for the observations

Definition at line 56 of file dimred_tools.h.

◆ t

Matrix2D<double> GenerateData::t

Underlying manifold coordinates. Eeach row of the matrix corresponds to the manifold coordinates of the observation in X.

Definition at line 53 of file dimred_tools.h.

◆ X

Matrix2D<double> GenerateData::X

Generated data. Each row of the matrix is an individual observation

Definition at line 48 of file dimred_tools.h.


The documentation for this class was generated from the following files: