Xmipp  v3.23.11-Nereus
Public Member Functions | List of all members
AngularAlignmentGpu::AngularSphAlignment Class Reference

#include <cuda_angular_sph_alignment.h>

Public Member Functions

void associateWith (ProgAngularSphAlignmentGpu *prog)
 
void setupConstantParameters ()
 
void setupChangingParameters ()
 
void pretuneKernel ()
 
void runKernel ()
 
void runKernelTest (Matrix1D< double > &clnm, size_t idxY0, double RmaxF2, double iRmaxF, Matrix2D< double > R, const MultidimArray< double > &mV, Matrix1D< double > &steps_cp, Matrix1D< int > &vL1, Matrix1D< int > &vN, Matrix1D< int > &vL2, Matrix1D< int > &vM, MultidimArray< int > &V_mask, MultidimArray< double > &mP)
 
void transferResults ()
 
KernelOutputs getOutputs ()
 
 AngularSphAlignment ()
 
 ~AngularSphAlignment ()
 

Detailed Description

Definition at line 67 of file cuda_angular_sph_alignment.h.

Constructor & Destructor Documentation

◆ AngularSphAlignment()

AngularAlignmentGpu::AngularSphAlignment::AngularSphAlignment ( )

Definition at line 77 of file cuda_angular_sph_alignment.cpp.

78 {
79 }

◆ ~AngularSphAlignment()

AngularAlignmentGpu::AngularSphAlignment::~AngularSphAlignment ( )

Definition at line 81 of file cuda_angular_sph_alignment.cpp.

82 {
83  if (program->useFakeKernel) {
84  } else {
85  cudaFree(dVolData);
86  cudaFree(dRotation);
87  cudaFree(dZshParams);
88  cudaFree(dClnm);
89  cudaFree(dVolMask);
90  cudaFree(dProjectionPlane);
91  }
92 }

Member Function Documentation

◆ associateWith()

void AngularAlignmentGpu::AngularSphAlignment::associateWith ( ProgAngularSphAlignmentGpu prog)

Definition at line 94 of file cuda_angular_sph_alignment.cpp.

95 {
96  program = prog;
97 }
ProgTransformDimRed * prog

◆ getOutputs()

KernelOutputs AngularAlignmentGpu::AngularSphAlignment::getOutputs ( )

Definition at line 190 of file cuda_angular_sph_alignment.cpp.

191 {
192  return outputs;
193 }

◆ pretuneKernel()

void AngularAlignmentGpu::AngularSphAlignment::pretuneKernel ( )

◆ runKernel()

void AngularAlignmentGpu::AngularSphAlignment::runKernel ( )

Definition at line 277 of file cuda_angular_sph_alignment.cpp.

278 {
279  if (program->useFakeKernel) {
280  fakeKernel(
281  Rmax2,
282  iRmax,
283  imageMetaData,
284  dVolData,
285  dRotation,
286  steps,
287  dZshParams,
288  dClnm,
289  dVolMask,
290  dProjectionPlane,
291  &outputs);
292  } else {
293  // Define thrust reduction vector
294  thrust::device_vector<PrecisionType> thrustVec(totalGridSize * 3, 0.0);
295 
296  // TEST make sure everything is ready before kernel starts
297  cudaDeviceSynchronize();
298 
299  // Run kernel
300  projectionKernel<<<grid, block, constantSharedMemSize + changingSharedMemSize>>>(
301  Rmax2,
302  iRmax,
303  imageMetaData,
304  dVolData,
305  dRotation,
306  steps,
307  dZshParams,
308  dClnm,
309  dVolMask,
310  dProjectionPlane,
311  thrust::raw_pointer_cast(thrustVec.data())
312  );
313 
314  cudaDeviceSynchronize();
315 
316  auto countIt = thrustVec.begin();
317  auto sumVDIt = countIt + totalGridSize;
318  auto modgIt = sumVDIt + totalGridSize;
319 
320  outputs.count = thrust::reduce(countIt, sumVDIt);
321  outputs.sumVD = thrust::reduce(sumVDIt, modgIt);
322  outputs.modg = thrust::reduce(modgIt, thrustVec.end());
323  }
324 }
void fakeKernel(PrecisionType Rmax2, PrecisionType iRmax, ImageMetaData volMeta, PrecisionType *volData, PrecisionType *rotation, int steps, int4 *zshparams, PrecisionType3 *clnm, int *Vmask, PrecisionType *projectionPlane, KernelOutputs *outputs)

◆ runKernelTest()

void AngularAlignmentGpu::AngularSphAlignment::runKernelTest ( Matrix1D< double > &  clnm,
size_t  idxY0,
double  RmaxF2,
double  iRmaxF,
Matrix2D< double >  R,
const MultidimArray< double > &  mV,
Matrix1D< double > &  steps_cp,
Matrix1D< int > &  vL1,
Matrix1D< int > &  vN,
Matrix1D< int > &  vL2,
Matrix1D< int > &  vM,
MultidimArray< int > &  V_mask,
MultidimArray< double > &  mP 
)

Definition at line 415 of file cuda_angular_sph_alignment.cpp.

430 {
431  size_t idxZ0=2*idxY0;
432  outputs.sumVD = 0.0;
433  outputs.modg = 0.0;
434  outputs.count = 0.0;
435 
436  Matrix1D<double> pos;
437  pos.initZeros(3);
438 
439  /*
440  std::cout
441  << "clnm: " << clnm[0] << "," << clnm[1] << "," << clnm[2] << "\n"
442  << "Rmax2: " << RmaxF2 << "\n"
443  << "iRmax: " << iRmaxF << "\n"
444  << "Rotation: " << R(0, 0) << "," << R(0, 1) << "," << R(1, 2) << "\n"
445  << "Volume: " << mV(0, 0, 0) << "," << mV(0, 0, 1) << "," << mV(0, 1, 2) << "\n"
446  << "" << std::endl;
447  */
448 
449  for (int k=STARTINGZ(mV); k<=FINISHINGZ(mV); k++) {
450  for (int i=STARTINGY(mV); i<=FINISHINGY(mV); i++) {
451  for (int j=STARTINGX(mV); j<=FINISHINGX(mV); j++) {
452  ZZ(pos) = k; YY(pos) = i; XX(pos) = j;
453  pos = R * pos;
454  //if (k == 10 && i == 10 && j == 10)
455  // std::cout << "pos("<<pos[0]<<","<<pos[1]<<","<<pos[2]<<")" << std::endl;
456  double gx=0.0, gy=0.0, gz=0.0;
457  double k2=ZZ(pos)*ZZ(pos);
458  double kr=ZZ(pos)*iRmaxF;
459  double k2i2=k2+YY(pos)*YY(pos);
460  double ir=YY(pos)*iRmaxF;
461  double r2=k2i2+XX(pos)*XX(pos);
462  double jr=XX(pos)*iRmaxF;
463  double rr=sqrt(r2)*iRmaxF;
464  if (r2<RmaxF2) {
465  for (size_t idx=0; idx<idxY0; idx++) {
466  if (VEC_ELEM(steps_cp,idx) == 1) {
467  double zsph=0.0;
468  int l1 = VEC_ELEM(vL1,idx);
469  int n = VEC_ELEM(vN,idx);
470  int l2 = VEC_ELEM(vL2,idx);
471  int m = VEC_ELEM(vM,idx);
472  zsph=::ZernikeSphericalHarmonics(l1,n,l2,m,jr,ir,kr,rr);
473  if (rr>0 || l2==0) {
474  gx += VEC_ELEM(clnm,idx) *(zsph);
475  gy += VEC_ELEM(clnm,idx+idxY0) *(zsph);
476  gz += VEC_ELEM(clnm,idx+idxZ0) *(zsph);
477  }
478  }
479  }
480 
481  int k_mask, i_mask, j_mask;
482  int voxelI_mask;
483  k_mask = (int)(ZZ(pos)+gz);
484  i_mask = (int)(YY(pos)+gy);
485  j_mask = (int)(XX(pos)+gx);
486 
487  if (V_mask.outside(k_mask, i_mask, j_mask)) {
488  voxelI_mask = 0;
489  } else {
490  voxelI_mask = A3D_ELEM(V_mask, k_mask, i_mask, j_mask);
491  }
492 
493  if (voxelI_mask == 1) {
494  double voxelI=mV.interpolatedElement3D(XX(pos)+gx,YY(pos)+gy,ZZ(pos)+gz);
495  A2D_ELEM(mP,i,j) += voxelI;
496  outputs.sumVD += voxelI;
497  outputs.modg += gx*gx+gy*gy+gz*gz;
498  outputs.count++;
499  }
500  }
501  }
502  }
503  }
504 }
#define A2D_ELEM(v, i, j)
#define VEC_ELEM(v, i)
Definition: matrix1d.h:245
#define FINISHINGX(v)
void sqrt(Image< double > &op)
#define FINISHINGZ(v)
#define STARTINGX(v)
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
#define STARTINGY(v)
#define A3D_ELEM(V, k, i, j)
#define XX(v)
Definition: matrix1d.h:85
T interpolatedElement3D(double x, double y, double z, T outside_value=(T) 0) const
void initZeros()
Definition: matrix1d.h:592
#define j
#define YY(v)
Definition: matrix1d.h:93
int m
#define FINISHINGY(v)
bool outside(int k, int i, int j) const
double ZernikeSphericalHarmonics(int n, int m, double xr, double yr, double zr, double r)
float r2
#define STARTINGZ(v)
int * n
#define ZZ(v)
Definition: matrix1d.h:101
int ir

◆ setupChangingParameters()

void AngularAlignmentGpu::AngularSphAlignment::setupChangingParameters ( )

Definition at line 136 of file cuda_angular_sph_alignment.cpp.

137 {
138  if (program == nullptr)
139  throw(std::runtime_error("AngularSphAlignment not associated with the program!"));
140 
141  if (program->useFakeKernel) {
142  setupClnmCpu();
143  setupRotationCpu();
144  setupProjectionPlaneCpu();
145  } else {
146  setupClnm();
147  setupRotation();
148  setupProjectionPlane();
149  }
150 
151  steps = program->onesInSteps;
152 
153  changingSharedMemSize = 0;
154  changingSharedMemSize += sizeof(int4) * steps;
155  changingSharedMemSize += sizeof(PrecisionType3) * steps;
156 }

◆ setupConstantParameters()

void AngularAlignmentGpu::AngularSphAlignment::setupConstantParameters ( )

Definition at line 102 of file cuda_angular_sph_alignment.cpp.

103 {
104  if (program == nullptr)
105  throw(std::runtime_error("AngularSphAlignment not associated with the program!"));
106 
107  // kernel arguments
108  this->Rmax2 = program->RmaxDef * program->RmaxDef;
109  this->iRmax = 1.0 / program->RmaxDef;
110  setupImageMetaData(program->V);
111 
112  if (program->useFakeKernel) {
113  setupVolumeDataCpu();
114  setupVolumeMaskCpu();
115  setupZSHparamsCpu();
116  } else {
117  setupVolumeData();
118  setupVolumeMask();
119  setupZSHparams();
120  }
121 
122  // kernel dimension
123  block.x = BLOCK_X_DIM;
124  block.y = BLOCK_Y_DIM;
125  block.z = BLOCK_Z_DIM;
126  grid.x = ((imageMetaData.xDim + block.x - 1) / block.x);
127  grid.y = ((imageMetaData.yDim + block.y - 1) / block.y);
128  grid.z = ((imageMetaData.zDim + block.z - 1) / block.z);
129 
130  totalGridSize = grid.x * grid.y * grid.z;
131 
132  // Dynamic shared memory
133  constantSharedMemSize = 0;
134 }
#define BLOCK_X_DIM
#define BLOCK_Y_DIM
#define BLOCK_Z_DIM

◆ transferResults()

void AngularAlignmentGpu::AngularSphAlignment::transferResults ( )

Definition at line 343 of file cuda_angular_sph_alignment.cpp.

344 {
345  if (program->useFakeKernel) {
346  transferProjectionPlaneCpu();
347  } else {
348  transferProjectionPlane();
349  }
350 }

The documentation for this class was generated from the following files: