Xmipp  v3.23.11-Nereus
cuda_xmipp_utils.h
Go to the documentation of this file.
1 
2 #ifndef CUDA_XMIPP_UTILS_H
3 #define CUDA_XMIPP_UTILS_H
4 
5 #include <stdio.h>
6 #include <complex>
7 
8 class myStreamHandle;
9 void mycufftDestroy(void *ptr);
10 void myStreamDestroy(void *ptr);
11 void myStreamCreate(myStreamHandle &myStream);
12 void gpuMalloc(void** d_data, size_t Nbytes);
13 void gpuFree(void* d_data);
14 void cpuMalloc(void** h_data, size_t Nbytes);
15 void cpuFree(void* h_data);
16 void initializeIdentity(float* d_data, float *h_data, int Ndim, myStreamHandle &myStream);
17 void setTranslationMatrix(float* d_data, float* posX, float* posY, int Ndim, myStreamHandle &myStream);
18 void setRotationMatrix(float* d_data, float *ang, int Ndim, myStreamHandle &myStream);
19 void gpuCopyFromGPUToGPUStream(void* d_dataFrom, void* d_dataTo, size_t Nbytes, myStreamHandle &myStream);
20 void gpuCopyFromCPUToGPUStream(void* data, void* d_data, size_t Nbytes, myStreamHandle &myStream);
21 void gpuCopyFromGPUToCPUStream(void* d_data, void* data, size_t Nbytes, myStreamHandle &myStream);
22 void gpuCopyFromGPUToGPU(void* d_dataFrom, void* d_dataTo, size_t Nbytes);
23 void gpuCopyFromCPUToGPU(void* data, void* d_data, size_t Nbytes);
24 void gpuCopyFromGPUToCPU(void* d_data, void* data, size_t Nbytes);
25 int gridFromBlock(int tasks, int Nthreads);
26 
30 
31 template<typename T>
32 T* loadToGPU(const T* data, size_t items);
33 
34 void cuda_check_gpu_memory(float* data);
35 void cuda_check_gpu_properties(int* maxGridSize);
36 
38 public:
39  void *ptr;
40 
42 
44  ptr=nullptr;
45  }
46 
47  void clear()
48  {
49  if (ptr!=nullptr)
50  mycufftDestroy(ptr);
51  ptr=nullptr;
52  }
53 
54 
55 };
56 
58 public:
59  void *ptr;
60 
62  ptr=nullptr;
63  }
64 
65  void clear()
66  {
67  if(ptr!=nullptr)
68  myStreamDestroy(ptr);
69  }
70 
71 };
72 
73 class XmippDim3 {
74 public:
75  size_t x;
76  size_t y;
77  size_t z;
78 
79  XmippDim3(size_t _x, size_t _y, size_t _z)
80  {
81  x=_x;
82  y=_y;
83  z=_z;
84  }
85 
87  {
88  x=y=z=0;
89  }
90 };
91 
92 #define CONVERT2DIM3(d) (dim3((d).x,(d).y,(d).z))
93 
94 
95 template<typename T>
97 {
98 public:
99  size_t Xdim, Ydim, Zdim, Ndim, yxdim, zyxdim, nzyxdim;
100  T* d_data;
101  T* h_data;
102 
104  {
105  Xdim=Ydim=Zdim=Ndim=yxdim=zyxdim=nzyxdim=0;
106  d_data=nullptr;
107  h_data=nullptr;
108  }
109 
110  TransformMatrix(myStreamHandle &myStream, size_t _Ndim, size_t _Xdim=3, size_t _Ydim=3, size_t _Zdim=1)
111  {
112  Xdim=Ydim=Zdim=Ndim=yxdim=zyxdim=nzyxdim=0;
113  d_data=NULL;
114  h_data=NULL;
115  resize(myStream,_Ndim, _Xdim, _Ydim, _Zdim);
116  }
117 
118  template<typename T1>
119  void resize(const TransformMatrix<T1>& array, myStreamHandle &myStream)
120  {
121 
122  resize(myStream, array.Ndim, array.Xdim, array.Ydim, array.Zdim);
123  }
124 
125  void resize(myStreamHandle &myStream, size_t _Ndim, size_t _Xdim=3, size_t _Ydim=3, size_t _Zdim=1);
126 
127  bool isEmpty()
128  {
129  return d_data==NULL && h_data==NULL;
130  }
131 
132  void clear();
133 
135  {
136  clear();
137  }
138 
139  void initialize(myStreamHandle &myStream)
140  {
141  initializeIdentity(d_data, h_data, Ndim, myStream);
142  }
143 
144  void setTranslation(float* posX, float* posY, float *d_out_max, myStreamHandle &myStream)
145  {
146  /*for(int i=0; i<Ndim; i++)
147  setTranslationMatrix(d_data, -posX[i], -posY[i], i);*/
148  setTranslationMatrix(d_data, posX, posY, Ndim, myStream);
149  }
150 
151  void setRotation(float* ang, myStreamHandle &myStream)
152  {
153  /*for(int i=0; i<Ndim; i++)
154  setRotationMatrix(d_data, -ang[i], i);*/
155  setRotationMatrix(d_data, ang, Ndim, myStream);
156  }
157 
158  void copyMatrix(TransformMatrix<float> &lastMatrix, myStreamHandle &myStream)
159  {
160  if (lastMatrix.isEmpty())
161  lastMatrix.resize(myStream, Ndim, 3, 3, 1);
162 
163  gpuCopyFromGPUToGPUStream(d_data, lastMatrix.d_data, nzyxdim*sizeof(float), myStream);
164  }
165 
167  {
168  gpuCopyFromGPUToCPUStream(d_data, h_data, nzyxdim*sizeof(float), myStream);
169  }
170 
171  void copyOneMatrixToCpu(float* &matrixCpu, int idxCpu, int idxGpu, myStreamHandle &myStream)
172  {
173  gpuCopyFromGPUToCPUStream(&d_data[9*idxGpu], &matrixCpu[9*idxCpu], 9*sizeof(float), myStream);
174  }
175 
176 
177 };
178 
179 
180 template<typename T>
182 {
183 public:
184  size_t Xdim, Ydim, Zdim, Ndim, yxdim, zyxdim, nzyxdim;
185  T* d_data;
186 
188  {
189  Xdim=Ydim=Zdim=Ndim=yxdim=zyxdim=nzyxdim=0;
190  d_data=nullptr;
191  }
192 
193  GpuMultidimArrayAtGpu(size_t _Xdim, size_t _Ydim=1, size_t _Zdim=1, size_t _Ndim=1)
194  {
195  Xdim=Ydim=Zdim=Ndim=yxdim=zyxdim=nzyxdim=0;
196  d_data=nullptr;
197  resize(_Xdim, _Ydim, _Zdim, _Ndim);
198  }
199 
200  GpuMultidimArrayAtGpu(size_t _Xdim, size_t _Ydim, size_t _Zdim, size_t _Ndim, T* deviceData)
201  {
202  setDims(_Xdim, _Ydim, _Zdim, _Ndim);
203  d_data = deviceData;
204  }
205 
206  template<typename T1>
208  {
209 
210  resize(array.Xdim, array.Ydim, array.Zdim, array.Ndim);
211  }
212 
213  void resize(size_t _Xdim, size_t _Ydim=1, size_t _Zdim=1, size_t _Ndim=1);
214 
215  bool isEmpty()
216  {
217  return d_data==nullptr;
218  }
219 
220  void clear()
221  {
222  if (d_data!=nullptr){
223  gpuFree((void*) d_data);
224 
225  }
226  Xdim=Ydim=Zdim=Ndim=yxdim=zyxdim=nzyxdim=0;
227  d_data=nullptr;
228  }
229 
231  {
232  clear();
233  }
234 
235  void copyToGpu(T* data)
236  {
237  gpuCopyFromCPUToGPU((void *)data, (void *)d_data, nzyxdim*sizeof(T));
238  }
239 
240  void copyToCpu(T* data)
241  {
242  gpuCopyFromGPUToCPU((void *)d_data, (void *)data, nzyxdim*sizeof(T));
243  }
244 
245  void copyToGpuStream(T* data, myStreamHandle &myStream)
246  {
247  gpuCopyFromCPUToGPUStream((void *)data, (void *)d_data, nzyxdim*sizeof(T), myStream);
248  }
249 
250  void fillImageToGpu(T* data, size_t n=0)
251  {
252  gpuCopyFromCPUToGPU((void *)data, (void *)&d_data[n*zyxdim], zyxdim*sizeof(T));
253  }
254 
255  void fillImageToGpuStream(T* data, myStreamHandle &myStream, int n=0)
256  {
257  gpuCopyFromCPUToGPUStream((void *)data, (void *)&d_data[n*zyxdim], zyxdim*sizeof(T), myStream);
258  }
259 
261  {
262  if (gpuArray.isEmpty())
263  gpuArray.resize(Xdim,Ydim,Zdim,Ndim);
264 
265  gpuCopyFromGPUToGPU(d_data, gpuArray.d_data, nzyxdim*sizeof(T));
266  }
267 
269  {
270  if (gpuArray.isEmpty())
271  gpuArray.resize(Xdim,Ydim,Zdim,Ndim);
272 
273  gpuCopyFromGPUToGPUStream(d_data, gpuArray.d_data, nzyxdim*sizeof(T), myStream);
274  }
275 
276  void calculateGridSize(const XmippDim3 &blockSize, XmippDim3 &gridSize) const
277  {
278  gridSize.x=gridFromBlock(Xdim,blockSize.x);
279  gridSize.y=gridFromBlock(Ydim,blockSize.y);
280  gridSize.z=gridFromBlock(Zdim,blockSize.z);
281  }
282 
283  void calculateGridSizeVectorized(const XmippDim3 &blockSize, XmippDim3 &gridSize) const
284  {
285  gridSize.x=gridFromBlock(nzyxdim,blockSize.x);
286  gridSize.y=1;
287  gridSize.z=1;
288  }
289 
290  template <typename T1>
291  void fft(GpuMultidimArrayAtGpu<T1> &fourierTransform, mycufftHandle &myhandle);
292 
293  // RealSpace must already be resized
294  template <typename T1>
295  void ifft(GpuMultidimArrayAtGpu<T1> &realSpace, mycufftHandle &myhandle);
296 
297  void fftStream(GpuMultidimArrayAtGpu<std::complex<float>> &fourierTransform, mycufftHandle &myhandle, myStreamHandle &myStream,
298  bool useCallback, GpuMultidimArrayAtGpu< std::complex<float>> &dataRef);
299 
300  // RealSpace must already be resized
301  template <typename T1>
302  void ifftStream(GpuMultidimArrayAtGpu<T1> &realSpace, mycufftHandle &myhandle, myStreamHandle &myStream,
303  bool useCallback, GpuMultidimArrayAtGpu< std::complex<float> > &dataExp);
304 
305 
306  void calculateMax(float *max_values, float *posX, float *posY, int fixPadding);
307 
308 
309 private:
310  void setDims(size_t _Xdim, size_t _Ydim=1, size_t _Zdim=1, size_t _Ndim=1) {
311  Xdim=_Xdim;
312  Ydim=_Ydim;
313  Zdim=_Zdim;
314  Ndim=_Ndim;
315  yxdim=(size_t)_Ydim*_Xdim;
316  zyxdim=yxdim*_Zdim;
317  nzyxdim=zyxdim*_Ndim;
318  }
319 };
321 #endif
void resize(const GpuMultidimArrayAtGpu< T1 > &array)
void gpuFree(void *d_data)
T * loadToGPU(const T *data, size_t items)
void gpuCopyFromGPUToGPU(void *d_dataFrom, void *d_dataTo, size_t Nbytes)
void mycufftDestroy(void *ptr)
void fillImageToGpuStream(T *data, myStreamHandle &myStream, int n=0)
void copyGpuToGpu(GpuMultidimArrayAtGpu< T > &gpuArray)
GpuMultidimArrayAtGpu(size_t _Xdim, size_t _Ydim, size_t _Zdim, size_t _Ndim, T *deviceData)
void gpuCopyFromGPUToGPUStream(void *d_dataFrom, void *d_dataTo, size_t Nbytes, myStreamHandle &myStream)
void resize(const TransformMatrix< T1 > &array, myStreamHandle &myStream)
void cpuMalloc(void **h_data, size_t Nbytes)
void copyOneMatrixToCpu(float *&matrixCpu, int idxCpu, int idxGpu, myStreamHandle &myStream)
void setTranslation(float *posX, float *posY, float *d_out_max, myStreamHandle &myStream)
TransformMatrix(myStreamHandle &myStream, size_t _Ndim, size_t _Xdim=3, size_t _Ydim=3, size_t _Zdim=1)
void myStreamCreate(myStreamHandle &myStream)
void myStreamDestroy(void *ptr)
void cuda_check_gpu_memory(float *data)
void calculateGridSize(const XmippDim3 &blockSize, XmippDim3 &gridSize) const
void gpuCopyFromGPUToCPU(void *d_data, void *data, size_t Nbytes)
void gpuCopyFromCPUToGPUStream(void *data, void *d_data, size_t Nbytes, myStreamHandle &myStream)
void cpuFree(void *h_data)
void copyMatrixToCpu(myStreamHandle &myStream)
GpuMultidimArrayAtGpu(size_t _Xdim, size_t _Ydim=1, size_t _Zdim=1, size_t _Ndim=1)
void gpuCopyFromCPUToGPU(void *data, void *d_data, size_t Nbytes)
void calculateGridSizeVectorized(const XmippDim3 &blockSize, XmippDim3 &gridSize) const
void gpuCopyFromGPUToCPUStream(void *d_data, void *data, size_t Nbytes, myStreamHandle &myStream)
void copyToGpuStream(T *data, myStreamHandle &myStream)
XmippDim3(size_t _x, size_t _y, size_t _z)
void fillImageToGpu(T *data, size_t n=0)
void initialize(myStreamHandle &myStream)
void setTranslationMatrix(float *d_data, float *posX, float *posY, int Ndim, myStreamHandle &myStream)
void setRotation(float *ang, myStreamHandle &myStream)
void cuda_check_gpu_properties(int *maxGridSize)
void copyMatrix(TransformMatrix< float > &lastMatrix, myStreamHandle &myStream)
void setRotationMatrix(float *d_data, float *ang, int Ndim, myStreamHandle &myStream)
void initializeIdentity(float *d_data, float *h_data, int Ndim, myStreamHandle &myStream)
void copyGpuToGpuStream(GpuMultidimArrayAtGpu< T > &gpuArray, myStreamHandle &myStream)
int gridFromBlock(int tasks, int Nthreads)
int * n
void gpuMalloc(void **d_data, size_t Nbytes)