Xmipp  v3.23.11-Nereus
Classes | Functions
Cuda GPU Reconstruct Fourier
Collaboration diagram for Cuda GPU Reconstruct Fourier:

Classes

struct  FRecBufferDataGPUWrapper
 

Functions

void allocateWrapper (RecFourierBufferData *buffer, int streamIndex)
 
void releaseWrapper (int streamIndex)
 
void createStreams (int count)
 
void deleteStreams (int count)
 
float * allocateTempVolumeGPU (float *&ptr, int size, int typeSize)
 
void releaseTempVolumeGPU (float *&ptr)
 
void copyTempVolumes (std::complex< float > ***tempVol, float ***tempWeights, float *tempVolGPU, float *tempWeightsGPU, int size)
 
void waitForGPU ()
 
void copyBlobTable (float *blobTableSqrt, int size)
 
void releaseBlobTable ()
 
void pinMemory (RecFourierBufferData *buffer)
 
void unpinMemory (RecFourierBufferData *buffer)
 
void copyConstants (int maxVolIndexX, int maxVolIndexYZ, float blobRadius, float blobAlpha, float iDeltaSqrt, float iw0, float oneOverBessiOrderAlpha)
 
void processBufferGPU (float *tempVolumeGPU, float *tempWeightsGPU, RecFourierBufferData *buffer, float blobRadius, int maxVolIndexYZ, bool useFast, float maxResolutionSqr, int stream, int blobOrder, float blobAlpha)
 

Detailed Description

Function Documentation

◆ allocateTempVolumeGPU()

float* allocateTempVolumeGPU ( float *&  ptr,
int  size,
int  typeSize 
)

Method to allocate 3D array (continuous) of given size^3 Allocated array is cleared (to 0 zero) Blocking operation

Definition at line 347 of file cuda_gpu_reconstruct_fourier.cpp.

347  {
348  size_t bytes = (size_t)size * size * size * typeSize;
349  cudaMalloc((void**)&ptr, bytes);
350  cudaMemset(ptr, 0.f, bytes);
351  gpuErrchk( cudaPeekAtLastError() );
352 
353  return ptr;
354 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
double * f

◆ allocateWrapper()

void allocateWrapper ( RecFourierBufferData buffer,
int  streamIndex 
)

Method will allocate buffer wrapper for given stream 'buffer' is used for size references, i.e. it has to have the same sizes that will be used later, during calculation Blocking operation

Definition at line 1093 of file cuda_gpu_reconstruct_fourier.cpp.

1093  {
1094  wrappers[streamIndex] = new FRecBufferDataGPUWrapper(buffer);
1095  gpuErrchk( cudaPeekAtLastError() );
1096 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
FRecBufferDataGPUWrapper ** wrappers

◆ copyBlobTable()

void copyBlobTable ( float *  blobTableSqrt,
int  size 
)

Method will allocate space at GPU and copy there content of the table Blocking operation

Definition at line 1098 of file cuda_gpu_reconstruct_fourier.cpp.

1098  {
1099  cudaMalloc((void **) &devBlobTableSqrt, blobTableSize*sizeof(float));
1100  cudaMemcpy(devBlobTableSqrt, blobTableSqrt, blobTableSize*sizeof(float), cudaMemcpyHostToDevice);
1101  gpuErrchk( cudaPeekAtLastError() );
1102 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
float * devBlobTableSqrt

◆ copyConstants()

void copyConstants ( int  maxVolIndexX,
int  maxVolIndexYZ,
float  blobRadius,
float  blobAlpha,
float  iDeltaSqrt,
float  iw0,
float  oneOverBessiOrderAlpha 
)

Method will copy constants used for calculation to GPU memory Blocking operation

Definition at line 1113 of file cuda_gpu_reconstruct_fourier.cpp.

1116  {
1117  cudaMemcpyToSymbol(cMaxVolumeIndexX, &maxVolIndexX,sizeof(maxVolIndexX));
1118  cudaMemcpyToSymbol(cMaxVolumeIndexYZ, &maxVolIndexYZ,sizeof(maxVolIndexYZ));
1119  cudaMemcpyToSymbol(cBlobRadius, &blobRadius,sizeof(blobRadius));
1120  cudaMemcpyToSymbol(cBlobAlpha, &blobAlpha,sizeof(blobAlpha));
1121  cudaMemcpyToSymbol(cIw0, &iw0,sizeof(iw0));
1122  cudaMemcpyToSymbol(cIDeltaSqrt, &iDeltaSqrt,sizeof(iDeltaSqrt));
1123  cudaMemcpyToSymbol(cOneOverBessiOrderAlpha, &oneOverBessiOrderAlpha,sizeof(oneOverBessiOrderAlpha));
1124  float oneOverBlobRadiusSqr = 1.f / (blobRadius * blobRadius);
1125  cudaMemcpyToSymbol(cOneOverBlobRadiusSqr, &oneOverBlobRadiusSqr,sizeof(oneOverBlobRadiusSqr));
1126  gpuErrchk( cudaPeekAtLastError() );
1127 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
__device__ __constant__ float cBlobRadius
__device__ __constant__ float cOneOverBlobRadiusSqr
__device__ __constant__ int cMaxVolumeIndexX
__device__ __constant__ int cMaxVolumeIndexYZ
__device__ __constant__ float cIDeltaSqrt
__device__ __constant__ float cBlobAlpha
__device__ __constant__ float cIw0
__device__ __constant__ float cOneOverBessiOrderAlpha

◆ copyTempVolumes()

void copyTempVolumes ( std::complex< float > ***  tempVol,
float ***  tempWeights,
float *  tempVolGPU,
float *  tempWeightsGPU,
int  size 
)

Method will copy continuous 3D arrays (with side of size) from GPU to non-continuous arrays on CPU Blocking operation

Definition at line 356 of file cuda_gpu_reconstruct_fourier.cpp.

358  {
359  for (int z = 0; z < size; z++) {
360  for (int y = 0; y < size; y++) {
361  int index = (z * size * size) + (y * size);
362  cudaMemcpy(tempVol[z][y], &tempVolGPU[2 * index], 2 * size * sizeof(float), cudaMemcpyDeviceToHost);
363  cudaMemcpy(tempWeights[z][y] , &tempWeightsGPU[index], size * sizeof(float), cudaMemcpyDeviceToHost);
364  }
365  }
366  gpuErrchk(cudaPeekAtLastError());
367 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
static double * y
viol index
double z

◆ createStreams()

void createStreams ( int  count)

Allocate 'count' streams on default GPU Blocking operation

Definition at line 1047 of file cuda_gpu_reconstruct_fourier.cpp.

1047  {
1048  wrappers = new FRecBufferDataGPUWrapper*[count];
1049  streams = new cudaStream_t[count];
1050  for (int i = 0; i < count; i++) {
1051  cudaStreamCreate(&streams[i]);
1052  }
1053 }
#define i
FRecBufferDataGPUWrapper ** wrappers
cudaStream_t * streams

◆ deleteStreams()

void deleteStreams ( int  count)

Delete streams allocated on default GPU Blocking operation

Definition at line 1055 of file cuda_gpu_reconstruct_fourier.cpp.

1055  {
1056  for (int i = 0; i < count; i++) {
1057  cudaStreamDestroy(streams[i]);
1058  }
1059  delete[] streams;
1060  delete[] wrappers;
1061 }
#define i
FRecBufferDataGPUWrapper ** wrappers
cudaStream_t * streams

◆ pinMemory()

void pinMemory ( RecFourierBufferData buffer)

Definition at line 1064 of file cuda_gpu_reconstruct_fourier.cpp.

1064  {
1065  if (buffer->hasCTFs) {
1066  GPU::pinMemory(buffer->CTFs, buffer->getMaxByteSize(buffer->CTFs));
1067  GPU::pinMemory(buffer->modulators, buffer->getMaxByteSize(buffer->modulators));
1068  }
1069  if (buffer->hasFFTs) {
1070  GPU::pinMemory(buffer->FFTs, buffer->getMaxByteSize(buffer->FFTs));
1071  } else {
1072  GPU::pinMemory(buffer->paddedImages, buffer->getMaxByteSize(buffer->paddedImages));
1073  }
1074  GPU::pinMemory(buffer->spaces, buffer->getMaxByteSize(buffer->spaces));
1075  GPU::pinMemory(buffer, sizeof(*buffer));
1076 }
static void pinMemory(const void *h_mem, size_t bytes, unsigned int flags=0)
Definition: gpu.cpp:92
RecFourierProjectionTraverseSpace * spaces

◆ processBufferGPU()

void processBufferGPU ( float *  tempVolumeGPU,
float *  tempWeightsGPU,
RecFourierBufferData buffer,
float  blobRadius,
int  maxVolIndexYZ,
bool  useFast,
float  maxResolutionSqr,
int  stream,
int  blobOrder,
float  blobAlpha 
)

Method will copy content of the 'buffer' to GPU and runs the calculation (asynchronously on given stream). Once it returns, 'buffer' object can be reused. See also createStreams(int)

Definition at line 1198 of file cuda_gpu_reconstruct_fourier.cpp.

1201  {
1202  switch (blobOrder) {
1203  case 0:
1204  if (blobAlpha <= 15.0) {
1205  processBufferGPU_<0, true>(tempVolumeGPU, tempWeightsGPU,
1206  buffer,
1207  blobRadius, maxVolIndexYZ, useFast,
1208  maxResolutionSqr,
1209  streamIndex);
1210  } else {
1211  processBufferGPU_<0, false>(tempVolumeGPU, tempWeightsGPU,
1212  buffer,
1213  blobRadius, maxVolIndexYZ, useFast,
1214  maxResolutionSqr,
1215  streamIndex);
1216  }
1217  break;
1218  case 1:
1219  processBufferGPU_<1, false>(tempVolumeGPU, tempWeightsGPU,
1220  buffer,
1221  blobRadius, maxVolIndexYZ, useFast,
1222  maxResolutionSqr,
1223  streamIndex);
1224  break;
1225  case 2:
1226  processBufferGPU_<2, false>(tempVolumeGPU, tempWeightsGPU,
1227  buffer,
1228  blobRadius, maxVolIndexYZ, useFast,
1229  maxResolutionSqr,
1230  streamIndex);
1231  break;
1232  case 3:
1233  processBufferGPU_<3, false>(tempVolumeGPU, tempWeightsGPU,
1234  buffer,
1235  blobRadius, maxVolIndexYZ, useFast,
1236  maxResolutionSqr,
1237  streamIndex);
1238  break;
1239  case 4:
1240  processBufferGPU_<4, false>(tempVolumeGPU, tempWeightsGPU,
1241  buffer,
1242  blobRadius, maxVolIndexYZ, useFast,
1243  maxResolutionSqr,
1244  streamIndex);
1245  break;
1246  default:
1247  REPORT_ERROR(ERR_VALUE_INCORRECT, "m out of range [0..4] in kaiser_value()");
1248  }
1249 }
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
HBITMAP buffer
Definition: svm-toy.cpp:37
Incorrect value received.
Definition: xmipp_error.h:195

◆ releaseBlobTable()

void releaseBlobTable ( )

Method will release all resources allocated for the blob table at GPU Blocking operation

Definition at line 1104 of file cuda_gpu_reconstruct_fourier.cpp.

1104  {
1105  cudaFree(devBlobTableSqrt);
1106  gpuErrchk( cudaPeekAtLastError() );
1107 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
float * devBlobTableSqrt

◆ releaseTempVolumeGPU()

void releaseTempVolumeGPU ( float *&  ptr)

Release memory at GPU Blocking operation

Definition at line 369 of file cuda_gpu_reconstruct_fourier.cpp.

369  {
370  cudaFree(ptr);
371  ptr = NULL;
372  gpuErrchk(cudaPeekAtLastError());
373 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31

◆ releaseWrapper()

void releaseWrapper ( int  streamIndex)

Release all resources allocated with the buffer wrapper, for given stream Blocking operation

Definition at line 1109 of file cuda_gpu_reconstruct_fourier.cpp.

1109  {
1110  delete wrappers[streamIndex];
1111 }
FRecBufferDataGPUWrapper ** wrappers

◆ unpinMemory()

void unpinMemory ( RecFourierBufferData buffer)

Definition at line 1078 of file cuda_gpu_reconstruct_fourier.cpp.

1078  {
1079  if (buffer->hasCTFs) {
1080  GPU::unpinMemory(buffer->CTFs);
1081  GPU::unpinMemory(buffer->modulators);
1082  }
1083  if (buffer->hasFFTs) {
1084  GPU::unpinMemory(buffer->FFTs);
1085  } else {
1086  GPU::unpinMemory(buffer->paddedImages);
1087  }
1088  GPU::unpinMemory(buffer->spaces);
1089  GPU::unpinMemory(buffer);
1090 }
RecFourierProjectionTraverseSpace * spaces
static void unpinMemory(const void *h_mem)
Definition: gpu.cpp:108

◆ waitForGPU()

void waitForGPU ( )

Blocking method. Once returns, all operation on default GPU are done

Definition at line 1042 of file cuda_gpu_reconstruct_fourier.cpp.

1042  {
1043  gpuErrchk( cudaPeekAtLastError() );
1044  gpuErrchk( cudaDeviceSynchronize() );
1045 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31