1299 cudaStream_t *streamTR = (cudaStream_t*) myStreamTR.
ptr;
1300 cudaStream_t *streamRT = (cudaStream_t*) myStreamRT.
ptr;
1324 XmippDim3 blockSize(numTh, 1, 1), gridSize;
1328 pointwiseMultiplicationComplexKernel<<< CONVERT2DIM3(gridSize), CONVERT2DIM3(blockSize), 0, *streamTR >>>
1333 XmippDim3 blockSize3(numTh, 1, 1), gridSize3;
1337 pointwiseMultiplicationComplexKernel<<< CONVERT2DIM3(gridSize3), CONVERT2DIM3(blockSize3), 0, *streamRT >>>
1348 XmippDim3 blockSize2(numTh, 1, 1), gridSize2;
1351 bool power2yx, power2x;
1360 calculateNccKernel<<< CONVERT2DIM3(gridSize2), CONVERT2DIM3(blockSize2), 0, *streamTR >>>
1367 if(referenceAux.
XdimOrig%2==0 && referenceAux.
Xdim%2==0)
1369 if(referenceAux.
XdimOrig%2==0 && referenceAux.
Xdim%2!=0)
1371 if(referenceAux.
XdimOrig%2!=0 && referenceAux.
Xdim%2==0)
1373 if(referenceAux.
XdimOrig%2!=0 && referenceAux.
Xdim%2!=0)
1377 XmippDim3 blockSize4(numTh, 1, 1), gridSize4;
1381 calculateNccRotationKernel<<< CONVERT2DIM3(gridSize4), CONVERT2DIM3(blockSize4), 0, *streamRT >>>
1404 numBlk = transMatTR.
Ndim/numTh;
1405 if(transMatTR.
Ndim%numTh > 0)
1413 double maxShift2 = (2*maxShift)*(2*maxShift);
1416 myStructureAuxTR.
d_NCC.
Ndim, myStructureAuxTR.
d_NCC.
yxdim, fixPadding, maxShift2, _power2x);
1418 numBlk = transMatRT.
Ndim/numTh;
1419 if(transMatRT.
Ndim%numTh > 0)
1439 gpuErrchk(cudaMemcpyAsync(max_vectorRT, myStructureAuxRT.
maxGpu.
d_data, myStructureAuxRT.
maxGpu.
Ndim*
sizeof(
float), cudaMemcpyDeviceToHost, *streamRT));
void resize(const GpuMultidimArrayAtGpu< T1 > &array)
GpuMultidimArrayAtGpu< float > RefExpRealSpacePolar
GpuMultidimArrayAtGpu< std::complex< float > > d_projPolarSquaredFFT
GpuMultidimArrayAtGpu< float > d_pos_polar_max
GpuMultidimArrayAtGpu< float > auxMax
GpuMultidimArrayAtGpu< float > RefExpRealSpace
void calculateMaxNew2DNew(int yxdim, int Ndim, float *d_data, GpuMultidimArrayAtGpu< float > &d_out, GpuMultidimArrayAtGpu< float > &d_pos, myStreamHandle &myStream)
GpuMultidimArrayAtGpu< std::complex< float > > RefExpFourier
GpuMultidimArrayAtGpu< float > maskAutocorrelation
GpuMultidimArrayAtGpu< std::complex< float > > d_projFFT
void ifftStream(GpuMultidimArrayAtGpu< T1 > &realSpace, mycufftHandle &myhandle, myStreamHandle &myStream, bool useCallback, GpuMultidimArrayAtGpu< std::complex< float > > &dataExp)
GpuMultidimArrayAtGpu< std::complex< float > > RefExpFourierPolar
GpuMultidimArrayAtGpu< float > d_out_max
GpuMultidimArrayAtGpu< std::complex< float > > d_projPolarFFT
GpuMultidimArrayAtGpu< float > d_NCCPolar
void calculateGridSizeVectorized(const XmippDim3 &blockSize, XmippDim3 &gridSize) const
GpuMultidimArrayAtGpu< float > d_NCCPolar1D
GpuMultidimArrayAtGpu< float > d_pos_max
GpuMultidimArrayAtGpu< float > MF2realSpace
GpuMultidimArrayAtGpu< float > MFrealSpace
GpuMultidimArrayAtGpu< float > maxGpu
GpuMultidimArrayAtGpu< float > d_NCC
GpuMultidimArrayAtGpu< float > auxZero
GpuMultidimArrayAtGpu< float > d_out_polar_max