Xmipp  v3.23.11-Nereus
Classes | Functions | Variables
cuda_xmipp_utils.cpp File Reference
#include "cuda_xmipp_utils.h"
#include "cuda_asserts.h"
#include <cuda_runtime.h>
#include <cufft.h>
#include <cufftXt.h>
#include <cuComplex.h>
#include <nvml.h>
#include <time.h>
#include <sys/time.h>
Include dependency graph for cuda_xmipp_utils.cpp:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  pointwiseMult
 

Functions

void myStreamDestroy (void *ptr)
 
void myStreamCreate (myStreamHandle &myStream)
 
void mycufftDestroy (void *ptr)
 
void calculateFFTPlanSize (mycufftHandle &myhandle)
 
void createPlanFFT (int Xdim, int Ydim, int Ndim, int Zdim, bool forward, cufftHandle *plan)
 
void createPlanFFTStream (int Xdim, int Ydim, int Ndim, int Zdim, bool forward, cufftHandle *plan, myStreamHandle &myStream)
 
void gpuMalloc (void **d_data, size_t Nbytes)
 
void gpuFree (void *d_data)
 
void cpuMalloc (void **h_data, size_t Nbytes)
 
void cpuFree (void *h_data)
 
void initializeIdentity (float *d_data, float *h_data, int Ndim, myStreamHandle &myStream)
 
void setTranslationMatrix (float *d_data, float *posX, float *posY, int Ndim, myStreamHandle &myStream)
 
void setRotationMatrix (float *d_data, float *ang, int Ndim, myStreamHandle &myStream)
 
void gpuCopyFromCPUToGPU (void *data, void *d_data, size_t Nbytes)
 
void gpuCopyFromGPUToCPU (void *d_data, void *data, size_t Nbytes)
 
void gpuCopyFromGPUToGPU (void *d_dataFrom, void *d_dataTo, size_t Nbytes)
 
void gpuCopyFromCPUToGPUStream (void *data, void *d_data, size_t Nbytes, myStreamHandle &myStream)
 
void gpuCopyFromGPUToCPUStream (void *d_data, void *data, size_t Nbytes, myStreamHandle &myStream)
 
void gpuCopyFromGPUToGPUStream (void *d_dataFrom, void *d_dataTo, size_t Nbytes, myStreamHandle &myStream)
 
int gridFromBlock (int tasks, int Nthreads)
 
void cuda_check_gpu_memory (float *data)
 
void cuda_check_gpu_properties (int *grid)
 
__device__ cufftComplex CB_pointwiseMultiplicationComplexKernelLoad (void *dataIn, size_t offset, void *callerInfo, void *sharedPtr)
 
__device__ void CB_pointwiseMultiplicationComplexKernelStore (void *dataOut, size_t offset, cufftComplex element, void *callerInfo, void *sharedPtr)
 
template float * loadToGPU< float > (const float *data, size_t items)
 
template std::complex< float > * loadToGPU< std::complex< float > > (const std::complex< float > *data, size_t items)
 
template<typename T >
T * loadToGPU (const T *data, size_t items)
 

Variables

__device__ cufftCallbackLoadC d_pointwiseMultiplicationComplexKernelLoad = CB_pointwiseMultiplicationComplexKernelLoad
 
__device__ cufftCallbackStoreC d_pointwiseMultiplicationComplexKernelStore = CB_pointwiseMultiplicationComplexKernelStore
 

Function Documentation

◆ calculateFFTPlanSize()

void calculateFFTPlanSize ( mycufftHandle myhandle)

Definition at line 79 of file cuda_xmipp_utils.cpp.

79  {
80  printf("calculateFFTPlanSize myhandle.ptr: %p\n",myhandle.ptr);
81  size_t ws2;
82  cufftHandle *planFptr=(cufftHandle *)myhandle.ptr;
83  cufftGetSize(*planFptr, &ws2);
84  printf("calculateFFTPlanSize size %i\n", (int)ws2);
85 }
int cufftHandle
Definition: cuda_fft.h:41

◆ CB_pointwiseMultiplicationComplexKernelLoad()

__device__ cufftComplex CB_pointwiseMultiplicationComplexKernelLoad ( void *  dataIn,
size_t  offset,
void *  callerInfo,
void *  sharedPtr 
)

Definition at line 347 of file cuda_xmipp_utils.cpp.

349 {
350 
351  //printf("INSIDEEEE IFFT\n");
352  pointwiseMult *myData = (pointwiseMult*)callerInfo;
353 
354  cufftComplex reference = ((cufftComplex*)dataIn)[offset];
355  cufftComplex *mask = (cufftComplex*)myData->data;
356 
357  int normFactor = myData->normFactor;
358  int indexM = offset%normFactor;
359 
360  float factor = 1.0f / normFactor;
361 
362  cufftComplex mulOut = cuCmulf((cuComplex)reference, (cuComplex)mask[indexM]);
363  cufftComplex out;
364  out.x = mulOut.x*factor;
365  out.y = mulOut.y*factor;
366 
367  //if(offset>9000 && offset<9100)
368  // printf("offset %i, mask %f, data %f, mul %f, factor %f\n", offset, mask[indexM].x, reference.x, out.x, factor);
369 
370  return out;
371 }
cufftComplex * data

◆ CB_pointwiseMultiplicationComplexKernelStore()

__device__ void CB_pointwiseMultiplicationComplexKernelStore ( void *  dataOut,
size_t  offset,
cufftComplex  element,
void *  callerInfo,
void *  sharedPtr 
)

Definition at line 376 of file cuda_xmipp_utils.cpp.

378 {
379 
380  pointwiseMult *myData = (pointwiseMult*)callerInfo;
381 
382  cufftComplex *mask = myData->data;
383  int normFactor = myData->normFactor;
384  int indexM = offset%normFactor;
385 
386  float factor = 1.0f / normFactor;
387 
388  cufftComplex mulOut = cuCmulf((cuComplex)element, (cuComplex)mask[indexM]);
389  cufftComplex out;
390  out.x = mulOut.x*factor;
391  out.y = mulOut.y*factor;
392  ((cufftComplex*)dataOut)[offset] = out;
393 
394 }
cufftComplex * data

◆ cpuFree()

void cpuFree ( void *  h_data)

Definition at line 211 of file cuda_xmipp_utils.cpp.

212 {
213  gpuErrchk(cudaFreeHost(h_data));
214 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ cpuMalloc()

void cpuMalloc ( void **  h_data,
size_t  Nbytes 
)

Definition at line 206 of file cuda_xmipp_utils.cpp.

207 {
208  gpuErrchk(cudaMallocHost(h_data, Nbytes));
209 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ createPlanFFT()

void createPlanFFT ( int  Xdim,
int  Ydim,
int  Ndim,
int  Zdim,
bool  forward,
cufftHandle plan 
)

Definition at line 88 of file cuda_xmipp_utils.cpp.

88  {
89 
90  int Xfdim=(Xdim/2)+1;
91 
92  int nr1[] = {Xdim}; // --- Size of the image in real space
93  int nr2[] = {Ydim, Xdim}; // --- Size of the image in real space
94  int nr3[] = {Zdim, Ydim, Xdim}; // --- Size of the image in real space
95 
96  int nf1[] = {Xfdim}; // --- Size of the Fourier transform
97  int nf2[] = {Ydim, Xfdim}; // --- Size of the Fourier transform
98  int nf3[] = {Zdim, Ydim, Xfdim}; // --- Size of the Fourier transform
99  int *nr=NULL, *nf=NULL;
100  int NRANK; // 1D, 2D or 3D FFTs
101  if (Ydim==1 && Zdim==1)
102  {
103  NRANK=1;
104  nr=nr1;
105  nf=nf1;
106  }
107  else if (Zdim==1)
108  {
109  NRANK=2;
110  nr=nr2;
111  nf=nf2;
112  }
113  else
114  {
115  NRANK=3;
116  nr=nr3;
117  nf=nf3;
118  }
119 
120  int rstride = 1; // --- Distance between two successive input/output elements
121  int fstride = 1;
122  int rdist = Xdim*Ydim*Zdim; // --- Distance between batches
123  int fdist = Xfdim*Ydim*Zdim;
124 
125  if(forward){
126  gpuErrchkFFT(cufftPlanMany(plan, NRANK, nr, nr, rstride, rdist, nf, fstride, fdist, CUFFT_R2C, Ndim));
127  }else{
128  gpuErrchkFFT(cufftPlanMany(plan, NRANK, nr, nf, fstride, fdist, nr, rstride, rdist, CUFFT_C2R, Ndim));
129  }
130 
131 }
void nf
#define gpuErrchkFFT(code)
Definition: cuda_asserts.h:32

◆ createPlanFFTStream()

void createPlanFFTStream ( int  Xdim,
int  Ydim,
int  Ndim,
int  Zdim,
bool  forward,
cufftHandle plan,
myStreamHandle myStream 
)

Definition at line 133 of file cuda_xmipp_utils.cpp.

134  {
135 
136  int Xfdim=(Xdim/2)+1;
137 
138  int nr1[] = {Xdim}; // --- Size of the image in real space
139  int nr2[] = {Ydim, Xdim}; // --- Size of the image in real space
140  int nr3[] = {Zdim, Ydim, Xdim}; // --- Size of the image in real space
141 
142  int nf1[] = {Xfdim}; // --- Size of the Fourier transform
143  int nf2[] = {Ydim, Xfdim}; // --- Size of the Fourier transform
144  int nf3[] = {Zdim, Ydim, Xfdim}; // --- Size of the Fourier transform
145  int *nr=NULL, *nf=NULL;
146  int NRANK; // 1D, 2D or 3D FFTs
147  if (Ydim==1 && Zdim==1)
148  {
149  NRANK=1;
150  nr=nr1;
151  nf=nf1;
152  }
153  else if (Zdim==1)
154  {
155  NRANK=2;
156  nr=nr2;
157  nf=nf2;
158  }
159  else
160  {
161  NRANK=3;
162  nr=nr3;
163  nf=nf3;
164  }
165 
166  int rstride = 1; // --- Distance between two successive input/output elements
167  int fstride = 1;
168  int rdist = Xdim*Ydim*Zdim; // --- Distance between batches
169  int fdist = Xfdim*Ydim*Zdim;
170 
171  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
172  if(forward){
173  gpuErrchkFFT(cufftPlanMany(plan, NRANK, nr, nr, rstride, rdist, nf, fstride, fdist, CUFFT_R2C, Ndim));
174  gpuErrchkFFT(cufftSetStream(*plan, *stream));
175  }else{
176  gpuErrchkFFT(cufftPlanMany(plan, NRANK, nr, nf, fstride, fdist, nr, rstride, rdist, CUFFT_C2R, Ndim));
177  gpuErrchkFFT(cufftSetStream(*plan, *stream));
178  }
179 
180 }
void nf
#define gpuErrchkFFT(code)
Definition: cuda_asserts.h:32

◆ gpuCopyFromCPUToGPU()

void gpuCopyFromCPUToGPU ( void *  data,
void *  d_data,
size_t  Nbytes 
)

Definition at line 278 of file cuda_xmipp_utils.cpp.

279 {
280  gpuErrchk(cudaMemcpy(d_data, data, Nbytes, cudaMemcpyHostToDevice));
281 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ gpuCopyFromCPUToGPUStream()

void gpuCopyFromCPUToGPUStream ( void *  data,
void *  d_data,
size_t  Nbytes,
myStreamHandle myStream 
)

Definition at line 293 of file cuda_xmipp_utils.cpp.

294 {
295  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
296  gpuErrchk(cudaMemcpyAsync(d_data, data, Nbytes, cudaMemcpyHostToDevice, *stream));
297 
298  //gpuErrchk(cudaStreamSynchronize(*stream));
299 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ gpuCopyFromGPUToCPU()

void gpuCopyFromGPUToCPU ( void *  d_data,
void *  data,
size_t  Nbytes 
)

Definition at line 283 of file cuda_xmipp_utils.cpp.

284 {
285  gpuErrchk(cudaMemcpy(data, d_data, Nbytes, cudaMemcpyDeviceToHost));
286 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ gpuCopyFromGPUToCPUStream()

void gpuCopyFromGPUToCPUStream ( void *  d_data,
void *  data,
size_t  Nbytes,
myStreamHandle myStream 
)

Definition at line 301 of file cuda_xmipp_utils.cpp.

302 {
303  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
304  gpuErrchk(cudaMemcpyAsync(data, d_data, Nbytes, cudaMemcpyDeviceToHost, *stream));
305 
306  gpuErrchk(cudaStreamSynchronize(*stream));
307  //cudaDeviceSynchronize();
308 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ gpuCopyFromGPUToGPU()

void gpuCopyFromGPUToGPU ( void *  d_dataFrom,
void *  d_dataTo,
size_t  Nbytes 
)

Definition at line 288 of file cuda_xmipp_utils.cpp.

289 {
290  gpuErrchk(cudaMemcpy(d_dataTo, d_dataFrom, Nbytes, cudaMemcpyDeviceToDevice));
291 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ gpuCopyFromGPUToGPUStream()

void gpuCopyFromGPUToGPUStream ( void *  d_dataFrom,
void *  d_dataTo,
size_t  Nbytes,
myStreamHandle myStream 
)

Definition at line 310 of file cuda_xmipp_utils.cpp.

311 {
312  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
313  gpuErrchk(cudaMemcpyAsync(d_dataTo, d_dataFrom, Nbytes, cudaMemcpyDeviceToDevice, *stream));
314 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ gpuFree()

void gpuFree ( void *  d_data)

Definition at line 201 of file cuda_xmipp_utils.cpp.

202 {
203  gpuErrchk(cudaFree(d_data));
204 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ gpuMalloc()

void gpuMalloc ( void **  d_data,
size_t  Nbytes 
)

Definition at line 196 of file cuda_xmipp_utils.cpp.

197 {
198  gpuErrchk(cudaMalloc(d_data, Nbytes));
199 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ gridFromBlock()

int gridFromBlock ( int  tasks,
int  Nthreads 
)

Definition at line 316 of file cuda_xmipp_utils.cpp.

317 {
318  int numBlk = tasks/Nthreads;
319  if(tasks%Nthreads>0)
320  numBlk++;
321  return numBlk;
322 }

◆ initializeIdentity()

void initializeIdentity ( float *  d_data,
float *  h_data,
int  Ndim,
myStreamHandle myStream 
)

Definition at line 216 of file cuda_xmipp_utils.cpp.

217 {
218  //float *matrices = new float[Ndim*9];
219  for(int i=0; i<Ndim; i++){
220  float aux_matrix[9] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
221  int offset=i*9;
222  for (int j=0; j<9; j++)
223  h_data[offset+j] = aux_matrix[j];
224  }
225  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
226  gpuErrchk(cudaMemcpyAsync((void*)d_data, h_data, Ndim*9*sizeof(float), cudaMemcpyHostToDevice, *stream));
227  //delete []matrices;
228 
229 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
#define i
#define j

◆ loadToGPU< float >()

template float* loadToGPU< float > ( const float *  data,
size_t  items 
)

◆ loadToGPU< std::complex< float > >()

template std::complex<float>* loadToGPU< std::complex< float > > ( const std::complex< float > *  data,
size_t  items 
)

◆ mycufftDestroy()

void mycufftDestroy ( void *  ptr)

Definition at line 72 of file cuda_xmipp_utils.cpp.

73 {
74  cufftHandle *planPtr = (cufftHandle *)ptr;
75  cufftDestroy(*planPtr);
76  delete planPtr;
77 }
int cufftHandle
Definition: cuda_fft.h:41

◆ myStreamCreate()

void myStreamCreate ( myStreamHandle myStream)

Definition at line 63 of file cuda_xmipp_utils.cpp.

64 {
65  cudaStream_t *streamPtr = new cudaStream_t;
66  gpuErrchk(cudaStreamCreate(streamPtr));
67  myStream.ptr = (void*)streamPtr;
68  //printf("ptr %p\n", myStream.ptr);
69  //printf("streamPtr %p\n", streamPtr);
70 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ myStreamDestroy()

void myStreamDestroy ( void *  ptr)

Definition at line 57 of file cuda_xmipp_utils.cpp.

58 {
59  cudaStream_t *streamPtr = (cudaStream_t *)ptr;
60  cudaStreamDestroy(*streamPtr);
61 }

◆ setRotationMatrix()

void setRotationMatrix ( float *  d_data,
float *  ang,
int  Ndim,
myStreamHandle myStream 
)

Definition at line 260 of file cuda_xmipp_utils.cpp.

261 {
262 
263  float *rad_vector;
264  gpuErrchk(cudaMallocHost((void**)&rad_vector, sizeof(float)*Ndim*9));
265 
266  for(int i=0; i<Ndim; i++){
267  float rad = (float)(-ang[i]*M_PI/180);
268  float matrix[9] = {cosf(rad), -sinf(rad), 0, sinf(rad), cosf(rad), 0, 0, 0, 1};
269  int offset=i*9;
270  for (int j=0; j<9; j++)
271  rad_vector[offset+j] = matrix[j];
272  }
273  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
274  gpuErrchk(cudaMemcpyAsync((void*)d_data, rad_vector, Ndim*9*sizeof(float), cudaMemcpyHostToDevice, *stream));
275  delete []rad_vector;
276 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
#define i
#define j

◆ setTranslationMatrix()

void setTranslationMatrix ( float *  d_data,
float *  posX,
float *  posY,
int  Ndim,
myStreamHandle myStream 
)

Definition at line 237 of file cuda_xmipp_utils.cpp.

238 {
239  float *matrices;
240  gpuErrchk(cudaMallocHost((void**)&matrices, sizeof(float)*Ndim*9));
241 
242  for(int i=0; i<Ndim; i++){
243  float aux_matrix[9] = {1, 0, -posX[i], 0, 1, -posY[i], 0, 0, 1};
244  int offset=i*9;
245  //memcpy(&matrices[offset], &aux_matrix, 9*sizeof(float));
246  for (int j=0; j<9; j++)
247  matrices[offset+j] = aux_matrix[j];
248  }
249  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
250  gpuErrchk(cudaMemcpyAsync((void*)d_data, matrices, Ndim*9*sizeof(float), cudaMemcpyHostToDevice, *stream));
251  delete []matrices;
252 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
#define i
#define j

Variable Documentation

◆ d_pointwiseMultiplicationComplexKernelLoad

__device__ cufftCallbackLoadC d_pointwiseMultiplicationComplexKernelLoad = CB_pointwiseMultiplicationComplexKernelLoad

Definition at line 372 of file cuda_xmipp_utils.cpp.

◆ d_pointwiseMultiplicationComplexKernelStore

__device__ cufftCallbackStoreC d_pointwiseMultiplicationComplexKernelStore = CB_pointwiseMultiplicationComplexKernelStore

Definition at line 395 of file cuda_xmipp_utils.cpp.