Xmipp  v3.23.11-Nereus
cuda_angular_sph_alignment.cpp
Go to the documentation of this file.
1 // Xmipp includes
2 #include "core/metadata_label.h"
4 #include "core/matrix1d.h"
7 #include "cuda_angular_sph_alignment.cu"
9 // Standard includes
10 #include <iterator>
11 #include <stdexcept>
12 #include <stdio.h>
13 #include <iostream>
14 #include <exception>
15 // Thrust includes
16 #include <thrust/reduce.h>
17 #include <thrust/device_vector.h>
18 
19 
21 
22 // Common functions
23 template<typename T>
24 cudaError cudaMallocAndCopy(T** target, const T* source, size_t numberOfElements, size_t memSize = 0)
25 {
26  size_t elemSize = numberOfElements * sizeof(T);
27  memSize = memSize == 0 ? elemSize : memSize * sizeof(T);
28 
29  cudaError err = cudaSuccess;
30  if ((err = cudaMalloc(target, memSize)) != cudaSuccess) {
31  *target = NULL;
32  return err;
33  }
34 
35  if ((err = cudaMemcpy(*target, source, elemSize, cudaMemcpyHostToDevice)) != cudaSuccess) {
36  cudaFree(*target);
37  *target = NULL;
38  }
39 
40  if (memSize > elemSize) {
41  cudaMemset((*target) + numberOfElements, 0, memSize - elemSize);
42  }
43 
44  return err;
45 }
46 
47 #define processCudaError() (_processCudaError(__FILE__, __LINE__))
48 void _processCudaError(const char* file, int line)
49 {
50  cudaError_t err = cudaGetLastError();
51  if (err != cudaSuccess) {
52  fprintf(stderr, "File: %s: line %d\nCuda error: %s\n", file, line, cudaGetErrorString(err));
53  exit(err);
54  }
55 }
56 
57 // Copies data from CPU to the GPU and at the same time transforms from
58 // type 'U' to type 'T'. Works only for numeric types
59 template<typename Target, typename Source>
60 void transformData(Target** dest, Source* source, size_t n, bool mallocMem = true)
61 {
62  std::vector<Target> tmp(source, source + n);
63 
64  if (mallocMem){
65  if (cudaMalloc(dest, sizeof(Target) * n) != cudaSuccess){
67  }
68  }
69 
70  if (cudaMemcpy(*dest, tmp.data(), sizeof(Target) * n, cudaMemcpyHostToDevice) != cudaSuccess){
72  }
73 }
74 
75 // AngularSphAlignment methods
76 
78 {
79 }
80 
82 {
83  if (program->useFakeKernel) {
84  } else {
85  cudaFree(dVolData);
86  cudaFree(dRotation);
87  cudaFree(dZshParams);
88  cudaFree(dClnm);
89  cudaFree(dVolMask);
90  cudaFree(dProjectionPlane);
91  }
92 }
93 
95 {
96  program = prog;
97 }
98 
99 static dim3 grid;
100 static dim3 block;
101 
103 {
104  if (program == nullptr)
105  throw(std::runtime_error("AngularSphAlignment not associated with the program!"));
106 
107  // kernel arguments
108  this->Rmax2 = program->RmaxDef * program->RmaxDef;
109  this->iRmax = 1.0 / program->RmaxDef;
110  setupImageMetaData(program->V);
111 
112  if (program->useFakeKernel) {
113  setupVolumeDataCpu();
114  setupVolumeMaskCpu();
115  setupZSHparamsCpu();
116  } else {
117  setupVolumeData();
118  setupVolumeMask();
119  setupZSHparams();
120  }
121 
122  // kernel dimension
123  block.x = BLOCK_X_DIM;
124  block.y = BLOCK_Y_DIM;
125  block.z = BLOCK_Z_DIM;
126  grid.x = ((imageMetaData.xDim + block.x - 1) / block.x);
127  grid.y = ((imageMetaData.yDim + block.y - 1) / block.y);
128  grid.z = ((imageMetaData.zDim + block.z - 1) / block.z);
129 
130  totalGridSize = grid.x * grid.y * grid.z;
131 
132  // Dynamic shared memory
133  constantSharedMemSize = 0;
134 }
135 
137 {
138  if (program == nullptr)
139  throw(std::runtime_error("AngularSphAlignment not associated with the program!"));
140 
141  if (program->useFakeKernel) {
142  setupClnmCpu();
143  setupRotationCpu();
144  setupProjectionPlaneCpu();
145  } else {
146  setupClnm();
147  setupRotation();
148  setupProjectionPlane();
149  }
150 
151  steps = program->onesInSteps;
152 
153  changingSharedMemSize = 0;
154  changingSharedMemSize += sizeof(int4) * steps;
155  changingSharedMemSize += sizeof(PrecisionType3) * steps;
156 }
157 
158 void AngularSphAlignment::setupClnm()
159 {
160  clnmVec.resize(program->vL1.size());
161 
162  for (unsigned i = 0; i < program->vL1.size(); ++i) {
163  clnmVec[i].x = program->clnm[i];
164  clnmVec[i].y = program->clnm[i + program->vL1.size()];
165  clnmVec[i].z = program->clnm[i + program->vL1.size() * 2];
166  }
167 
168  if (dClnm == nullptr) {
169  if (cudaMallocAndCopy(&dClnm, clnmVec.data(), clnmVec.size()) != cudaSuccess)
171  } else {
172  if (cudaMemcpy(dClnm, clnmVec.data(), clnmVec.size() * sizeof(PrecisionType3),
173  cudaMemcpyHostToDevice) != cudaSuccess)
175  }
176 }
177 
178 void AngularSphAlignment::setupClnmCpu()
179 {
180  clnmVec.resize(program->vL1.size());
181 
182  for (unsigned i = 0; i < program->vL1.size(); ++i) {
183  clnmVec[i].x = program->clnm[i];
184  clnmVec[i].y = program->clnm[i + program->vL1.size()];
185  clnmVec[i].z = program->clnm[i + program->vL1.size() * 2];
186  }
187  dClnm = clnmVec.data();
188 }
189 
191 {
192  return outputs;
193 }
194 
195 void AngularSphAlignment::transferImageData(Image<double>& outputImage, PrecisionType* inputData)
196 {
197  size_t elements = imageMetaData.xDim * imageMetaData.yDim * imageMetaData.zDim;
198  std::vector<PrecisionType> tVec(elements);
199  cudaMemcpy(tVec.data(), inputData, sizeof(PrecisionType) * elements, cudaMemcpyDeviceToHost);
200  std::vector<double> dVec(tVec.begin(), tVec.end());
201  memcpy(outputImage().data, dVec.data(), sizeof(double) * elements);
202 }
203 
204 void AngularSphAlignment::setupVolumeData()
205 {
206  const auto& vol = program->V();
207  transformData(&dVolData, vol.data, vol.zyxdim, dVolData == nullptr);
208 }
209 
210 void AngularSphAlignment::setupVolumeDataCpu()
211 {
212  const auto& vol = program->V();
213  volDataVec.assign(vol.data, vol.data + vol.zyxdim);
214  dVolData = volDataVec.data();
215 }
216 
217 void AngularSphAlignment::setupRotation()
218 {
219  transformData(&dRotation, program->R.mdata, program->R.mdim, dRotation == nullptr);
220 }
221 
222 void AngularSphAlignment::setupRotationCpu()
223 {
224  rotationVec.assign(program->R.mdata, program->R.mdata + program->R.mdim);
225  dRotation = rotationVec.data();
226 }
227 
228 void AngularSphAlignment::setupVolumeMask()
229 {
230  if (dVolMask == nullptr) {
231  if (cudaMallocAndCopy(&dVolMask, program->V_mask.data, program->V_mask.getSize())
232  != cudaSuccess)
234  } else {
235  if (cudaMemcpy(dVolMask, program->V_mask.data, program->V_mask.getSize() * sizeof(int),
236  cudaMemcpyHostToDevice) != cudaSuccess)
238  }
239 }
240 
241 void AngularSphAlignment::setupVolumeMaskCpu()
242 {
243  dVolMask = program->V_mask.data;
244 }
245 
246 void AngularSphAlignment::setupProjectionPlane()
247 {
248  const auto& projPlane = program->P();
249  if (dProjectionPlane == nullptr) {
250  if (cudaMalloc(&dProjectionPlane, projPlane.yxdim * sizeof(PrecisionType)) != cudaSuccess)
252  }
253  cudaMemset(dProjectionPlane, 0, projPlane.yxdim * sizeof(PrecisionType));
254 }
255 
256 void AngularSphAlignment::setupProjectionPlaneCpu()
257 {
258  const auto& projPlane = program->P();
259  projectionPlaneVec.assign(projPlane.data, projPlane.data + projPlane.yxdim);
260  dProjectionPlane = projectionPlaneVec.data();
261 }
262 
263 void fakeKernel(
264  PrecisionType Rmax2,
265  PrecisionType iRmax,
266  ImageMetaData volMeta,
267  PrecisionType* volData,
268  PrecisionType* rotation,
269  int steps,
270  int4* zshparams,
271  PrecisionType3* clnm,
272  int* Vmask,
273  PrecisionType* projectionPlane,
274  KernelOutputs* outputs
275  );
276 
278 {
279  if (program->useFakeKernel) {
280  fakeKernel(
281  Rmax2,
282  iRmax,
283  imageMetaData,
284  dVolData,
285  dRotation,
286  steps,
287  dZshParams,
288  dClnm,
289  dVolMask,
290  dProjectionPlane,
291  &outputs);
292  } else {
293  // Define thrust reduction vector
294  thrust::device_vector<PrecisionType> thrustVec(totalGridSize * 3, 0.0);
295 
296  // TEST make sure everything is ready before kernel starts
297  cudaDeviceSynchronize();
298 
299  // Run kernel
300  projectionKernel<<<grid, block, constantSharedMemSize + changingSharedMemSize>>>(
301  Rmax2,
302  iRmax,
303  imageMetaData,
304  dVolData,
305  dRotation,
306  steps,
307  dZshParams,
308  dClnm,
309  dVolMask,
310  dProjectionPlane,
311  thrust::raw_pointer_cast(thrustVec.data())
312  );
313 
314  cudaDeviceSynchronize();
315 
316  auto countIt = thrustVec.begin();
317  auto sumVDIt = countIt + totalGridSize;
318  auto modgIt = sumVDIt + totalGridSize;
319 
320  outputs.count = thrust::reduce(countIt, sumVDIt);
321  outputs.sumVD = thrust::reduce(sumVDIt, modgIt);
322  outputs.modg = thrust::reduce(modgIt, thrustVec.end());
323  }
324 }
325 
326 void AngularSphAlignment::transferProjectionPlane()
327 {
328  // mozna lepsi nez neustale pretypovavat a kopirovat vectory, to proste ukladat v double na GPU
329  // nic se tam nepocita jen se to ulozi (tzn "jedno" pretypovani z float na double)
330  std::vector<PrecisionType> tmp(program->P().zyxdim);
331  cudaMemcpy(tmp.data(), dProjectionPlane, tmp.size() * sizeof(PrecisionType),
332  cudaMemcpyDeviceToHost);
333  std::vector<double> tmpDouble(tmp.begin(), tmp.end());
334  memcpy(program->P().data, tmpDouble.data(), tmpDouble.size() * sizeof(double));
335 }
336 
337 void AngularSphAlignment::transferProjectionPlaneCpu()
338 {
339  std::vector<double> tmp(projectionPlaneVec.begin(), projectionPlaneVec.end());
340  memcpy(program->P().data, tmp.data(), tmp.size() * sizeof(double));
341 }
342 
344 {
345  if (program->useFakeKernel) {
346  transferProjectionPlaneCpu();
347  } else {
348  transferProjectionPlane();
349  }
350 }
351 
352 void AngularSphAlignment::setupZSHparams()
353 {
354  zshparamsVec.resize(program->vL1.size());
355 
356  for (unsigned i = 0; i < zshparamsVec.size(); ++i) {
357  zshparamsVec[i].w = program->vL1[i];
358  zshparamsVec[i].x = program->vN[i];
359  zshparamsVec[i].y = program->vL2[i];
360  zshparamsVec[i].z = program->vM[i];
361  }
362 
363  if (dZshParams == nullptr) {
364  if (cudaMallocAndCopy(&dZshParams, zshparamsVec.data(), zshparamsVec.size()) != cudaSuccess)
366  } else {
367  if (cudaMemcpy(dZshParams, zshparamsVec.data(), zshparamsVec.size() * sizeof(int4),
368  cudaMemcpyHostToDevice) != cudaSuccess)
370  }
371 }
372 
373 void AngularSphAlignment::setupZSHparamsCpu()
374 {
375  zshparamsVec.resize(program->vL1.size());
376 
377  for (unsigned i = 0; i < zshparamsVec.size(); ++i) {
378  zshparamsVec[i].w = program->vL1[i];
379  zshparamsVec[i].x = program->vN[i];
380  zshparamsVec[i].y = program->vL2[i];
381  zshparamsVec[i].z = program->vM[i];
382  }
383  dZshParams = zshparamsVec.data();
384 }
385 
386 void setupImageNew(Image<double>& inputImage, PrecisionType** outputImageData)
387 {
388  auto& mda = inputImage();
389  transformData(outputImageData, mda.data, mda.xdim * mda.ydim * mda.zdim);
390 }
391 
392 void AngularSphAlignment::setupImageMetaData(const Image<double>& mda)
393 {
394  imageMetaData.xShift = mda().xinit;
395  imageMetaData.yShift = mda().yinit;
396  imageMetaData.zShift = mda().zinit;
397  imageMetaData.xDim = mda().xdim;
398  imageMetaData.yDim = mda().ydim;
399  imageMetaData.zDim = mda().zdim;
400 }
401 
402 void AngularSphAlignment::setupImage(Image<double>& inputImage, PrecisionType** outputImageData)
403 {
404  auto& mda = inputImage();
405  transformData(outputImageData, mda.data, mda.xdim * mda.ydim * mda.zdim);
406 }
407 
408 void AngularSphAlignment::setupImage(const ImageMetaData& inputImage, PrecisionType** outputImageData)
409 {
410  size_t size = inputImage.xDim * inputImage.yDim * inputImage.zDim * sizeof(PrecisionType);
411  if (cudaMalloc(outputImageData, size) != cudaSuccess)
413 }
414 
416  Matrix1D<double>& clnm,
417  size_t idxY0,
418  double RmaxF2,
419  double iRmaxF,
421  const MultidimArray<double>& mV,
422  Matrix1D<double>& steps_cp,
423  Matrix1D<int>& vL1,
424  Matrix1D<int>& vN,
425  Matrix1D<int>& vL2,
426  Matrix1D<int>& vM,
427  MultidimArray<int>& V_mask,
429  )
430 {
431  size_t idxZ0=2*idxY0;
432  outputs.sumVD = 0.0;
433  outputs.modg = 0.0;
434  outputs.count = 0.0;
435 
436  Matrix1D<double> pos;
437  pos.initZeros(3);
438 
439  /*
440  std::cout
441  << "clnm: " << clnm[0] << "," << clnm[1] << "," << clnm[2] << "\n"
442  << "Rmax2: " << RmaxF2 << "\n"
443  << "iRmax: " << iRmaxF << "\n"
444  << "Rotation: " << R(0, 0) << "," << R(0, 1) << "," << R(1, 2) << "\n"
445  << "Volume: " << mV(0, 0, 0) << "," << mV(0, 0, 1) << "," << mV(0, 1, 2) << "\n"
446  << "" << std::endl;
447  */
448 
449  for (int k=STARTINGZ(mV); k<=FINISHINGZ(mV); k++) {
450  for (int i=STARTINGY(mV); i<=FINISHINGY(mV); i++) {
451  for (int j=STARTINGX(mV); j<=FINISHINGX(mV); j++) {
452  ZZ(pos) = k; YY(pos) = i; XX(pos) = j;
453  pos = R * pos;
454  //if (k == 10 && i == 10 && j == 10)
455  // std::cout << "pos("<<pos[0]<<","<<pos[1]<<","<<pos[2]<<")" << std::endl;
456  double gx=0.0, gy=0.0, gz=0.0;
457  double k2=ZZ(pos)*ZZ(pos);
458  double kr=ZZ(pos)*iRmaxF;
459  double k2i2=k2+YY(pos)*YY(pos);
460  double ir=YY(pos)*iRmaxF;
461  double r2=k2i2+XX(pos)*XX(pos);
462  double jr=XX(pos)*iRmaxF;
463  double rr=sqrt(r2)*iRmaxF;
464  if (r2<RmaxF2) {
465  for (size_t idx=0; idx<idxY0; idx++) {
466  if (VEC_ELEM(steps_cp,idx) == 1) {
467  double zsph=0.0;
468  int l1 = VEC_ELEM(vL1,idx);
469  int n = VEC_ELEM(vN,idx);
470  int l2 = VEC_ELEM(vL2,idx);
471  int m = VEC_ELEM(vM,idx);
472  zsph=::ZernikeSphericalHarmonics(l1,n,l2,m,jr,ir,kr,rr);
473  if (rr>0 || l2==0) {
474  gx += VEC_ELEM(clnm,idx) *(zsph);
475  gy += VEC_ELEM(clnm,idx+idxY0) *(zsph);
476  gz += VEC_ELEM(clnm,idx+idxZ0) *(zsph);
477  }
478  }
479  }
480 
481  int k_mask, i_mask, j_mask;
482  int voxelI_mask;
483  k_mask = (int)(ZZ(pos)+gz);
484  i_mask = (int)(YY(pos)+gy);
485  j_mask = (int)(XX(pos)+gx);
486 
487  if (V_mask.outside(k_mask, i_mask, j_mask)) {
488  voxelI_mask = 0;
489  } else {
490  voxelI_mask = A3D_ELEM(V_mask, k_mask, i_mask, j_mask);
491  }
492 
493  if (voxelI_mask == 1) {
494  double voxelI=mV.interpolatedElement3D(XX(pos)+gx,YY(pos)+gy,ZZ(pos)+gz);
495  A2D_ELEM(mP,i,j) += voxelI;
496  outputs.sumVD += voxelI;
497  outputs.modg += gx*gx+gy*gy+gz*gz;
498  outputs.count++;
499  }
500  }
501  }
502  }
503  }
504 }
505 
506 void rotate(PrecisionType* pos, PrecisionType* rotation)
507 {
508  PrecisionType tmp[3] = {0};
509 
510  for (size_t i = 0; i < 3; i++)
511  for (size_t j = 0; j < 3; j++)
512  tmp[i] += rotation[3 * i + j] * pos[j];
513 
514  pos[0] = tmp[0];
515  pos[1] = tmp[1];
516  pos[2] = tmp[2];
517 }
518 
521  PrecisionType outside_value = 0)
522 {
523  int x0 = FLOOR(x);
524  PrecisionType fx = x - x0;
525  int x1 = x0 + 1;
526 
527  int y0 = FLOOR(y);
528  PrecisionType fy = y - y0;
529  int y1 = y0 + 1;
530 
531  int z0 = FLOOR(z);
532  PrecisionType fz = z - z0;
533  int z1 = z0 + 1;
534 
535  PrecisionType d000 = (IS_OUTSIDE(ImD, z0, y0, x0)) ?
536  outside_value : ELEM_3D_SHIFTED(data, ImD, z0, y0, x0);
537  PrecisionType d001 = (IS_OUTSIDE(ImD, z0, y0, x1)) ?
538  outside_value : ELEM_3D_SHIFTED(data, ImD, z0, y0, x1);
539  PrecisionType d010 = (IS_OUTSIDE(ImD, z0, y1, x0)) ?
540  outside_value : ELEM_3D_SHIFTED(data, ImD, z0, y1, x0);
541  PrecisionType d011 = (IS_OUTSIDE(ImD, z0, y1, x1)) ?
542  outside_value : ELEM_3D_SHIFTED(data, ImD, z0, y1, x1);
543  PrecisionType d100 = (IS_OUTSIDE(ImD, z1, y0, x0)) ?
544  outside_value : ELEM_3D_SHIFTED(data, ImD, z1, y0, x0);
545  PrecisionType d101 = (IS_OUTSIDE(ImD, z1, y0, x1)) ?
546  outside_value : ELEM_3D_SHIFTED(data, ImD, z1, y0, x1);
547  PrecisionType d110 = (IS_OUTSIDE(ImD, z1, y1, x0)) ?
548  outside_value : ELEM_3D_SHIFTED(data, ImD, z1, y1, x0);
549  PrecisionType d111 = (IS_OUTSIDE(ImD, z1, y1, x1)) ?
550  outside_value : ELEM_3D_SHIFTED(data, ImD, z1, y1, x1);
551 
552  PrecisionType dx00 = LIN_INTERP(fx, d000, d001);
553  PrecisionType dx01 = LIN_INTERP(fx, d100, d101);
554  PrecisionType dx10 = LIN_INTERP(fx, d010, d011);
555  PrecisionType dx11 = LIN_INTERP(fx, d110, d111);
556  PrecisionType dxy0 = LIN_INTERP(fy, dx00, dx10);
557  PrecisionType dxy1 = LIN_INTERP(fy, dx01, dx11);
558 
559  return LIN_INTERP(fz, dxy0, dxy1);
560 }
561 
563  PrecisionType Rmax2,
564  PrecisionType iRmax,
565  ImageMetaData volMeta,
566  PrecisionType* volData,
567  PrecisionType* rotation,
568  int steps,
569  int4* zshparams,
570  PrecisionType3* clnm,
571  int* Vmask,
572  PrecisionType* projectionPlane,
573  KernelOutputs* outputs
574  )
575 {
576  outputs->sumVD = 0.0;
577  outputs->modg = 0.0;
578  outputs->count = 0.0;
579 
580  PrecisionType pos[3];
581 
582  /*
583  std::cout
584  << "clnm: " << clnm[0].x << "," << clnm[1].x << "," << clnm[2].x << "\n"
585  << "Rmax2: " << Rmax2 << "\n"
586  << "iRmax: " << iRmax << "\n"
587  << "Rotation: " << rotation[0] << "," << rotation[1] << "," << rotation[5] << "\n"
588  << "Volume: " << ELEM_3D_SHIFTED(volData, volMeta, 0, 0, 0) << "," << ELEM_3D_SHIFTED(volData, volMeta, 0, 0, 1) << "," << ELEM_3D_SHIFTED(volData, volMeta, 0, 1, 2) << "\n"
589  << "" << std::endl;
590  */
591 
592  for (int k = P2L_Z_IDX(volMeta, 0); k < P2L_Z_IDX(volMeta, volMeta.zDim); k++) {
593  for (int i = P2L_Y_IDX(volMeta, 0); i < P2L_Y_IDX(volMeta, volMeta.yDim); i++) {
594  for (int j = P2L_X_IDX(volMeta, 0); j < P2L_X_IDX(volMeta, volMeta.xDim); j++) {
595  pos[2] = k; pos[1] = i; pos[0] = j;
596  rotate(pos, rotation);
597  //if (k == 10 && i == 10 && j == 10)
598  // std::cout << "pos("<<pos[0]<<","<<pos[1]<<","<<pos[2]<<")" << std::endl;
599  double gx = 0.0, gy = 0.0, gz = 0.0;
600  double k2= pos[2] * pos[2];
601  double kr= pos[2] * iRmax;
602  double k2i2 =k2 + pos[1] * pos[1];
603  double ir= pos[1] * iRmax;
604  double r2= k2i2 + pos[0] * pos[0];
605  double jr= pos[0] * iRmax;
606  double rr = sqrt(r2) * iRmax;
607 
608  if (r2 < Rmax2) {
609  for (int idx = 0; idx < steps; idx++) {
610  int l1 = zshparams[idx].w;
611  int n = zshparams[idx].x;
612  int l2 = zshparams[idx].y;
613  int m = zshparams[idx].z;
614  PrecisionType zsph = ::ZernikeSphericalHarmonics(l1,n,l2,m,jr,ir,kr,rr);
615  if (rr>0 || l2==0) {
616  gx += zsph * clnm[idx].x;
617  gy += zsph * clnm[idx].y;
618  gz += zsph * clnm[idx].z;
619  }
620  }
621 
622  int k_mask, i_mask, j_mask;
623  int voxelI_mask;
624  k_mask = (int)(pos[2] + gz);
625  i_mask = (int)(pos[1] + gy);
626  j_mask = (int)(pos[0] + gx);
627 
628  if (IS_OUTSIDE(volMeta, k_mask, i_mask, j_mask)) {
629  voxelI_mask = 0;
630  } else {
631  voxelI_mask = ELEM_3D_SHIFTED(Vmask, volMeta, k_mask, i_mask, j_mask);
632  }
633 
634  if (voxelI_mask == 1) {
635  double voxelI=interpolatedElement3DCpu(volData, volMeta,
636  pos[0] + gx, pos[1] + gy, pos[2] + gz);
637  ELEM_2D_SHIFTED(projectionPlane, volMeta, i, j) += voxelI;
638  outputs->sumVD += voxelI;
639  outputs->modg += gx*gx + gy*gy + gz*gz;
640  outputs->count++;
641  }
642  }
643  }// inner for
644  }
645  }
646 }
647 
648 } // namespace AngularAlignmentGpu
#define A2D_ELEM(v, i, j)
#define VEC_ELEM(v, i)
Definition: matrix1d.h:245
void rotate(PrecisionType *pos, PrecisionType *rotation)
#define FINISHINGX(v)
void _processCudaError(const char *file, int line)
size_t size() const
Definition: matrix1d.h:508
void sqrt(Image< double > &op)
static double * y
T * mdata
Definition: matrix2d.h:395
cudaError cudaMallocAndCopy(T **target, const T *source, size_t numberOfElements, size_t memSize=0)
#define z0
#define FINISHINGZ(v)
#define STARTINGX(v)
doublereal * x
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
#define STARTINGY(v)
#define A3D_ELEM(V, k, i, j)
ProgTransformDimRed * prog
MultidimArray< T > data
Definition: xmipp_image.h:55
#define processCudaError()
void setupImageNew(Image< double > &inputImage, PrecisionType **outputImageData)
#define BLOCK_X_DIM
#define FLOOR(x)
Definition: xmipp_macros.h:240
#define y0
#define x0
#define XX(v)
Definition: matrix1d.h:85
void associateWith(ProgAngularSphAlignmentGpu *prog)
void fakeKernel(PrecisionType Rmax2, PrecisionType iRmax, ImageMetaData volMeta, PrecisionType *volData, PrecisionType *rotation, int steps, int4 *zshparams, PrecisionType3 *clnm, int *Vmask, PrecisionType *projectionPlane, KernelOutputs *outputs)
#define BLOCK_Y_DIM
void runKernelTest(Matrix1D< double > &clnm, size_t idxY0, double RmaxF2, double iRmaxF, Matrix2D< double > R, const MultidimArray< double > &mV, Matrix1D< double > &steps_cp, Matrix1D< int > &vL1, Matrix1D< int > &vN, Matrix1D< int > &vL2, Matrix1D< int > &vM, MultidimArray< int > &V_mask, MultidimArray< double > &mP)
double z
T interpolatedElement3D(double x, double y, double z, T outside_value=(T) 0) const
PrecisionType interpolatedElement3DCpu(PrecisionType *data, ImageMetaData ImD, PrecisionType x, PrecisionType y, PrecisionType z, PrecisionType outside_value=0)
#define BLOCK_Z_DIM
void initZeros()
Definition: matrix1d.h:592
#define j
#define YY(v)
Definition: matrix1d.h:93
int m
#define FINISHINGY(v)
bool outside(int k, int i, int j) const
double ZernikeSphericalHarmonics(int n, int m, double xr, double yr, double zr, double r)
void transformData(Target **dest, Source *source, size_t n, bool mallocMem=true)
size_t mdim
Definition: matrix2d.h:416
fprintf(glob_prnt.io, "\)
float r2
#define LIN_INTERP(a, l, h)
Definition: xmipp_macros.h:381
#define STARTINGZ(v)
int * n
#define ZZ(v)
Definition: matrix1d.h:101
int ir