Xmipp  v3.23.11-Nereus
Classes | Functions
Cuda GPU Correlation
Collaboration diagram for Cuda GPU Correlation:

Classes

class  StructuresAux
 
class  GpuCorrelationAux
 

Functions

void cuda_calculate_correlation (GpuCorrelationAux &referenceAux, GpuCorrelationAux &experimentalAux, TransformMatrix< float > &transMat, float *max_vector, int maxShift, mycufftHandle &myhandlePadded, bool mirror, StructuresAux &myStructureAux, myStreamHandle &myStream, TransformMatrix< float > &resultTR, bool saveMaxVector)
 
void cuda_calculate_correlation_rotation (GpuCorrelationAux &referenceAux, GpuCorrelationAux &experimentalAux, TransformMatrix< float > &transMat, float *max_vector, int maxShift, mycufftHandle &myhandlePadded, bool mirror, StructuresAux &myStructureAux, myStreamHandle &myStream, TransformMatrix< float > &resultRT)
 
void apply_transform (GpuMultidimArrayAtGpu< float > &d_original_image, GpuMultidimArrayAtGpu< float > &d_transform_image, TransformMatrix< float > &transMat, myStreamHandle &myStream)
 
void padding_masking (GpuMultidimArrayAtGpu< float > &d_orig_image, GpuMultidimArrayAtGpu< float > &mask, GpuMultidimArrayAtGpu< float > &padded_image_gpu, GpuMultidimArrayAtGpu< float > &padded_image2_gpu, GpuMultidimArrayAtGpu< float > &padded_mask_gpu, bool experimental, myStreamHandle &myStream)
 
void cuda_calculate_correlation_two (GpuCorrelationAux &referenceAux, GpuCorrelationAux &experimentalAuxTR, TransformMatrix< float > &transMatTR, float *max_vectorTR, int maxShift, mycufftHandle &myhandlePaddedTR, bool mirror, StructuresAux &myStructureAuxTR, myStreamHandle &myStreamTR, GpuCorrelationAux &experimentalAuxRT, TransformMatrix< float > &transMatRT, float *max_vectorRT, mycufftHandle &myhandlePaddedRT, StructuresAux &myStructureAuxRT, myStreamHandle &myStreamRT, TransformMatrix< float > &resultTR, TransformMatrix< float > &resultRT, mycufftHandle &ifftcb, bool saveMaxVector)
 

Detailed Description

Function Documentation

◆ apply_transform()

void apply_transform ( GpuMultidimArrayAtGpu< float > &  d_original_image,
GpuMultidimArrayAtGpu< float > &  d_transform_image,
TransformMatrix< float > &  transMat,
myStreamHandle myStream 
)

Definition at line 1447 of file cuda_gpu_correlation.cpp.

1448  {
1449 
1450  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
1451 
1452  int numTh = 1024;
1453 
1454  int numBlk = d_transform_image.yxdim/numTh;
1455  if(d_transform_image.yxdim%numTh > 0)
1456  numBlk++;
1457  dim3 blockSize(numTh, 1, 1);
1458  dim3 gridSize(numBlk, d_transform_image.Ndim, 1);
1459 
1460  bool power2yx, power2x;
1461  if (d_original_image.yxdim & (d_original_image.yxdim-1))
1462  power2yx = false;
1463  else
1464  power2yx = true;
1465  if (d_original_image.Xdim & (d_original_image.Xdim-1))
1466  power2x = false;
1467  else
1468  power2x = true;
1469  applyTransformKernel<<< gridSize, blockSize, 9*sizeof(float), *stream >>>
1470  (d_original_image.d_data, d_transform_image.d_data, transMat.d_data,
1471  d_original_image.nzyxdim, d_original_image.yxdim, d_original_image.Xdim,
1472  d_original_image.Ydim, power2yx, power2x);
1473 
1474 }

◆ cuda_calculate_correlation()

void cuda_calculate_correlation ( GpuCorrelationAux referenceAux,
GpuCorrelationAux experimentalAux,
TransformMatrix< float > &  transMat,
float *  max_vector,
int  maxShift,
mycufftHandle myhandlePadded,
bool  mirror,
StructuresAux myStructureAux,
myStreamHandle myStream,
TransformMatrix< float > &  resultTR,
bool  saveMaxVector 
)

Definition at line 1205 of file cuda_gpu_correlation.cpp.

1209 {
1210 
1211  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
1212 
1213  myStructureAux.RefExpFourier.resize(referenceAux.d_projFFT.Xdim, referenceAux.d_projFFT.Ydim,
1214  referenceAux.d_projFFT.Zdim, referenceAux.d_projFFT.Ndim);
1215 
1216  int numTh = 1024;
1217  XmippDim3 blockSize(numTh, 1, 1), gridSize;
1218  referenceAux.d_projFFT.calculateGridSizeVectorized(blockSize, gridSize);
1219 
1220  pointwiseMultiplicationComplexKernel<<< CONVERT2DIM3(gridSize), CONVERT2DIM3(blockSize), 0, *stream >>>
1221  ((cufftComplex*)referenceAux.d_projFFT.d_data, (cufftComplex*)experimentalAux.d_projFFT.d_data, (cufftComplex*)myStructureAux.RefExpFourier.d_data,
1222  referenceAux.d_projFFT.nzyxdim, referenceAux.d_projFFT.yxdim);
1223 
1224 
1225  myStructureAux.RefExpRealSpace.resize(referenceAux.Xdim, referenceAux.Ydim, referenceAux.d_projFFT.Zdim,
1226  referenceAux.d_projFFT.Ndim);
1227 
1229  myStructureAux.RefExpFourier.ifftStream(myStructureAux.RefExpRealSpace, myhandlePadded, myStream, false, dull);
1230 
1231 
1232  XmippDim3 blockSize2(numTh, 1, 1), gridSize2;
1233  myStructureAux.RefExpRealSpace.calculateGridSizeVectorized(blockSize2, gridSize2);
1234 
1235  myStructureAux.d_NCC.resize(referenceAux.Xdim, referenceAux.Ydim, referenceAux.d_projFFT.Zdim,
1236  referenceAux.d_projFFT.Ndim);
1237 
1238  bool power2yx, power2x;
1239  if (referenceAux.MFrealSpace.yxdim & (referenceAux.MFrealSpace.yxdim-1))
1240  power2yx = false;
1241  else
1242  power2yx = true;
1243  if (referenceAux.MFrealSpace.Xdim & (referenceAux.MFrealSpace.Xdim-1))
1244  power2x = false;
1245  else
1246  power2x = true;
1247  calculateNccKernel<<< CONVERT2DIM3(gridSize2), CONVERT2DIM3(blockSize2), 0, *stream >>>
1248  (myStructureAux.RefExpRealSpace.d_data, referenceAux.MFrealSpace.d_data, experimentalAux.MFrealSpace.d_data, referenceAux.MF2realSpace.d_data,
1249  experimentalAux.MF2realSpace.d_data, referenceAux.maskAutocorrelation.d_data, myStructureAux.d_NCC.d_data, referenceAux.MFrealSpace.nzyxdim,
1250  referenceAux.MFrealSpace.yxdim, referenceAux.MFrealSpace.Xdim, referenceAux.MFrealSpace.Ydim, referenceAux.maskCount, maxShift, power2yx, power2x);
1251 
1252  int fixPadding=0;
1253  if(referenceAux.XdimOrig%2==0 && referenceAux.Xdim%2==0)
1254  fixPadding=1;
1255  if(referenceAux.XdimOrig%2==0 && referenceAux.Xdim%2!=0)
1256  fixPadding=0;
1257  if(referenceAux.XdimOrig%2!=0 && referenceAux.Xdim%2==0)
1258  fixPadding=-1;
1259  if(referenceAux.XdimOrig%2!=0 && referenceAux.Xdim%2!=0)
1260  fixPadding=0;
1261 
1262  calculateMaxNew2DNew(myStructureAux.d_NCC.yxdim, myStructureAux.d_NCC.Ndim,
1263  myStructureAux.d_NCC.d_data, myStructureAux.d_out_max, myStructureAux.d_pos_max, myStream);
1264 
1265  numTh = 1024;
1266  int numBlk = transMat.Ndim/numTh;
1267  if(transMat.Ndim%numTh > 0)
1268  numBlk++;
1269 
1270  bool _power2x;
1271  if (myStructureAux.d_NCC.Xdim & (myStructureAux.d_NCC.Xdim-1))
1272  _power2x = false;
1273  else
1274  _power2x = true;
1275  double maxShift2 = (2*maxShift)*(2*maxShift);
1276  buildTranslationMatrix<<<numBlk, numTh, 0, *stream>>> (myStructureAux.d_pos_max.d_data, transMat.d_data, resultTR.d_data,
1277  myStructureAux.d_out_max.d_data, myStructureAux.d_NCC.d_data, myStructureAux.d_NCC.Xdim, myStructureAux.d_NCC.Ydim,
1278  myStructureAux.d_NCC.Ndim, myStructureAux.d_NCC.yxdim, fixPadding, maxShift2, _power2x);
1279 
1280  resultTR.copyMatrix(transMat, myStream);
1281 
1282  if(saveMaxVector)
1283  gpuErrchk(cudaMemcpyAsync(max_vector, myStructureAux.d_out_max.d_data, myStructureAux.d_NCC.Ndim*sizeof(float), cudaMemcpyDeviceToHost, *stream));
1284 
1285 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
void resize(const GpuMultidimArrayAtGpu< T1 > &array)
GpuMultidimArrayAtGpu< float > RefExpRealSpace
void calculateMaxNew2DNew(int yxdim, int Ndim, float *d_data, GpuMultidimArrayAtGpu< float > &d_out, GpuMultidimArrayAtGpu< float > &d_pos, myStreamHandle &myStream)
GpuMultidimArrayAtGpu< std::complex< float > > RefExpFourier
GpuMultidimArrayAtGpu< float > maskAutocorrelation
GpuMultidimArrayAtGpu< std::complex< float > > d_projFFT
void ifftStream(GpuMultidimArrayAtGpu< T1 > &realSpace, mycufftHandle &myhandle, myStreamHandle &myStream, bool useCallback, GpuMultidimArrayAtGpu< std::complex< float > > &dataExp)
GpuMultidimArrayAtGpu< float > d_out_max
void calculateGridSizeVectorized(const XmippDim3 &blockSize, XmippDim3 &gridSize) const
GpuMultidimArrayAtGpu< float > d_pos_max
GpuMultidimArrayAtGpu< float > MF2realSpace
GpuMultidimArrayAtGpu< float > MFrealSpace
GpuMultidimArrayAtGpu< float > d_NCC
void copyMatrix(TransformMatrix< float > &lastMatrix, myStreamHandle &myStream)

◆ cuda_calculate_correlation_rotation()

void cuda_calculate_correlation_rotation ( GpuCorrelationAux referenceAux,
GpuCorrelationAux experimentalAux,
TransformMatrix< float > &  transMat,
float *  max_vector,
int  maxShift,
mycufftHandle myhandlePadded,
bool  mirror,
StructuresAux myStructureAux,
myStreamHandle myStream,
TransformMatrix< float > &  resultRT 
)

Definition at line 1127 of file cuda_gpu_correlation.cpp.

1130 {
1131 
1132 
1133  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
1134 
1135  myStructureAux.RefExpFourierPolar.resize(referenceAux.d_projPolarFFT.Xdim, referenceAux.d_projPolarFFT.Ydim,
1136  referenceAux.d_projPolarFFT.Zdim, referenceAux.d_projPolarFFT.Ndim);
1137 
1138  int numTh = 1024;
1139  XmippDim3 blockSize(numTh, 1, 1), gridSize;
1140  referenceAux.d_projPolarFFT.calculateGridSizeVectorized(blockSize, gridSize);
1141 
1142  pointwiseMultiplicationComplexKernel<<< CONVERT2DIM3(gridSize), CONVERT2DIM3(blockSize), 0, *stream >>>
1143  ((cufftComplex*)referenceAux.d_projPolarFFT.d_data, (cufftComplex*)experimentalAux.d_projPolarFFT.d_data,
1144  (cufftComplex*)myStructureAux.RefExpFourierPolar.d_data, referenceAux.d_projPolarFFT.nzyxdim,
1145  referenceAux.d_projPolarFFT.yxdim);
1146 
1148  myStructureAux.RefExpRealSpacePolar.resize(referenceAux.XdimPolar, referenceAux.YdimPolar, referenceAux.d_projPolarFFT.Zdim,
1149  referenceAux.d_projPolarFFT.Ndim);
1150  myStructureAux.RefExpFourierPolar.ifftStream(myStructureAux.RefExpRealSpacePolar, myhandlePadded, myStream, false, dull);
1151 
1152  XmippDim3 blockSize2(numTh, 1, 1), gridSize2;
1153  myStructureAux.RefExpRealSpacePolar.calculateGridSizeVectorized(blockSize2, gridSize2);
1154 
1155  myStructureAux.d_NCCPolar.resize(referenceAux.XdimPolar, referenceAux.YdimPolar, referenceAux.d_projPolarFFT.Zdim,
1156  referenceAux.d_projPolarFFT.Ndim);
1157 
1158  double maskFFTPolar = (referenceAux.XdimPolar*referenceAux.YdimPolar);
1159  calculateNccRotationKernel<<< CONVERT2DIM3(gridSize2), CONVERT2DIM3(blockSize2), 0, *stream >>>
1160  (myStructureAux.RefExpRealSpacePolar.d_data, (cufftComplex*)referenceAux.d_projPolarFFT.d_data, (cufftComplex*)experimentalAux.d_projPolarFFT.d_data,
1161  (cufftComplex*)referenceAux.d_projPolarSquaredFFT.d_data, (cufftComplex*)experimentalAux.d_projPolarSquaredFFT.d_data,
1162  maskFFTPolar, myStructureAux.d_NCCPolar.d_data, referenceAux.d_projPolarFFT.yxdim, myStructureAux.RefExpRealSpacePolar.nzyxdim,
1163  myStructureAux.RefExpRealSpacePolar.yxdim);
1164 
1165  //AJ sum along the radius
1166  numTh = 1024;
1167  int numBlk = (myStructureAux.d_NCCPolar.Xdim*myStructureAux.d_NCCPolar.Ndim)/numTh;
1168  if((myStructureAux.d_NCCPolar.Xdim*myStructureAux.d_NCCPolar.Ndim)%numTh!=0)
1169  numBlk++;
1170 
1171  myStructureAux.d_NCCPolar1D.resize(myStructureAux.d_NCCPolar.Xdim,1,1,myStructureAux.d_NCCPolar.Ndim);
1172  myStructureAux.auxMax.resize(myStructureAux.d_NCCPolar.Xdim,1,1,myStructureAux.d_NCCPolar.Ndim);
1173  myStructureAux.auxZero.resize(myStructureAux.d_NCCPolar.Xdim,1,1,myStructureAux.d_NCCPolar.Ndim);
1174  sumRadiusKernel<<< numBlk, numTh, 0, *stream >>>(myStructureAux.d_NCCPolar.d_data, myStructureAux.d_NCCPolar1D.d_data, myStructureAux.auxMax.d_data,
1175  myStructureAux.auxZero.d_data, myStructureAux.d_NCCPolar.Xdim*myStructureAux.d_NCCPolar.Ndim, myStructureAux.d_NCCPolar.Ydim,
1176  myStructureAux.d_NCCPolar.Ndim);
1177 
1178  calculateMaxNew2DNew(myStructureAux.d_NCCPolar1D.Xdim, myStructureAux.d_NCCPolar1D.Ndim, myStructureAux.d_NCCPolar1D.d_data,
1179  myStructureAux.d_out_polar_max, myStructureAux.d_pos_polar_max, myStream);
1180 
1181  numTh = 1024;
1182  numBlk = transMat.Ndim/numTh;
1183  if(transMat.Ndim%numTh > 0)
1184  numBlk++;
1185 
1186  bool _power2x;
1187  if (myStructureAux.d_NCCPolar1D.Xdim & (myStructureAux.d_NCCPolar1D.Xdim-1))
1188  _power2x = false;
1189  else
1190  _power2x = true;
1191  double maxShift2 = (2*maxShift)*(2*maxShift);
1192  myStructureAux.maxGpu.resize(myStructureAux.d_NCCPolar1D.Ndim);
1193  buildRotationMatrix<<<numBlk, numTh, 0, *stream>>> (myStructureAux.d_pos_polar_max.d_data, transMat.d_data,
1194  resultRT.d_data, myStructureAux.maxGpu.d_data, myStructureAux.auxMax.d_data, myStructureAux.auxZero.d_data,
1195  myStructureAux.d_NCCPolar1D.Xdim, myStructureAux.d_NCCPolar1D.Ndim,
1196  myStructureAux.d_NCCPolar1D.yxdim, 0, maxShift2, _power2x);
1197 
1198  resultRT.copyMatrix(transMat, myStream);
1199 
1200  gpuErrchk(cudaMemcpyAsync(max_vector, myStructureAux.maxGpu.d_data, myStructureAux.maxGpu.Ndim*sizeof(float), cudaMemcpyDeviceToHost, *stream));
1201 
1202 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
void resize(const GpuMultidimArrayAtGpu< T1 > &array)
GpuMultidimArrayAtGpu< float > RefExpRealSpacePolar
GpuMultidimArrayAtGpu< std::complex< float > > d_projPolarSquaredFFT
GpuMultidimArrayAtGpu< float > d_pos_polar_max
GpuMultidimArrayAtGpu< float > auxMax
void calculateMaxNew2DNew(int yxdim, int Ndim, float *d_data, GpuMultidimArrayAtGpu< float > &d_out, GpuMultidimArrayAtGpu< float > &d_pos, myStreamHandle &myStream)
void ifftStream(GpuMultidimArrayAtGpu< T1 > &realSpace, mycufftHandle &myhandle, myStreamHandle &myStream, bool useCallback, GpuMultidimArrayAtGpu< std::complex< float > > &dataExp)
GpuMultidimArrayAtGpu< std::complex< float > > RefExpFourierPolar
GpuMultidimArrayAtGpu< std::complex< float > > d_projPolarFFT
GpuMultidimArrayAtGpu< float > d_NCCPolar
void calculateGridSizeVectorized(const XmippDim3 &blockSize, XmippDim3 &gridSize) const
GpuMultidimArrayAtGpu< float > d_NCCPolar1D
GpuMultidimArrayAtGpu< float > maxGpu
void copyMatrix(TransformMatrix< float > &lastMatrix, myStreamHandle &myStream)
GpuMultidimArrayAtGpu< float > auxZero
GpuMultidimArrayAtGpu< float > d_out_polar_max

◆ cuda_calculate_correlation_two()

void cuda_calculate_correlation_two ( GpuCorrelationAux referenceAux,
GpuCorrelationAux experimentalAuxTR,
TransformMatrix< float > &  transMatTR,
float *  max_vectorTR,
int  maxShift,
mycufftHandle myhandlePaddedTR,
bool  mirror,
StructuresAux myStructureAuxTR,
myStreamHandle myStreamTR,
GpuCorrelationAux experimentalAuxRT,
TransformMatrix< float > &  transMatRT,
float *  max_vectorRT,
mycufftHandle myhandlePaddedRT,
StructuresAux myStructureAuxRT,
myStreamHandle myStreamRT,
TransformMatrix< float > &  resultTR,
TransformMatrix< float > &  resultRT,
mycufftHandle ifftcb,
bool  saveMaxVector 
)

Definition at line 1288 of file cuda_gpu_correlation.cpp.

1297 {
1298 
1299  cudaStream_t *streamTR = (cudaStream_t*) myStreamTR.ptr;
1300  cudaStream_t *streamRT = (cudaStream_t*) myStreamRT.ptr;
1301 
1302 
1303  myStructureAuxTR.RefExpFourier.resize(referenceAux.d_projFFT.Xdim, referenceAux.d_projFFT.Ydim,
1304  referenceAux.d_projFFT.Zdim, referenceAux.d_projFFT.Ndim);
1305  myStructureAuxTR.RefExpRealSpace.resize(referenceAux.Xdim, referenceAux.Ydim, referenceAux.d_projFFT.Zdim,
1306  referenceAux.d_projFFT.Ndim);
1307  myStructureAuxTR.d_NCC.resize(referenceAux.Xdim, referenceAux.Ydim, referenceAux.d_projFFT.Zdim,
1308  referenceAux.d_projFFT.Ndim);
1309 
1310  myStructureAuxRT.RefExpFourierPolar.resize(referenceAux.d_projPolarFFT.Xdim, referenceAux.d_projPolarFFT.Ydim,
1311  referenceAux.d_projPolarFFT.Zdim, referenceAux.d_projPolarFFT.Ndim);
1312  myStructureAuxRT.RefExpRealSpacePolar.resize(referenceAux.XdimPolar, referenceAux.YdimPolar, referenceAux.d_projPolarFFT.Zdim,
1313  referenceAux.d_projPolarFFT.Ndim);
1314  myStructureAuxRT.d_NCCPolar.resize(referenceAux.XdimPolar, referenceAux.YdimPolar, referenceAux.d_projPolarFFT.Zdim,
1315  referenceAux.d_projPolarFFT.Ndim);
1316  myStructureAuxRT.d_NCCPolar1D.resize(myStructureAuxRT.d_NCCPolar.Xdim,1,1,myStructureAuxRT.d_NCCPolar.Ndim);
1317  myStructureAuxRT.auxMax.resize(myStructureAuxRT.d_NCCPolar.Xdim,1,1,myStructureAuxRT.d_NCCPolar.Ndim);
1318  myStructureAuxRT.auxZero.resize(myStructureAuxRT.d_NCCPolar.Xdim,1,1,myStructureAuxRT.d_NCCPolar.Ndim);
1319  myStructureAuxRT.maxGpu.resize(myStructureAuxRT.d_NCCPolar1D.Ndim);
1320 
1321 
1322 
1323  int numTh = 1024;
1324  XmippDim3 blockSize(numTh, 1, 1), gridSize;
1325  referenceAux.d_projFFT.calculateGridSizeVectorized(blockSize, gridSize);
1326 
1327 
1328  pointwiseMultiplicationComplexKernel<<< CONVERT2DIM3(gridSize), CONVERT2DIM3(blockSize), 0, *streamTR >>>
1329  ((cufftComplex*)referenceAux.d_projFFT.d_data, (cufftComplex*)experimentalAuxTR.d_projFFT.d_data,
1330  (cufftComplex*)myStructureAuxTR.RefExpFourier.d_data,
1331  referenceAux.d_projFFT.nzyxdim, referenceAux.d_projFFT.yxdim);
1332 
1333  XmippDim3 blockSize3(numTh, 1, 1), gridSize3;
1334  referenceAux.d_projPolarFFT.calculateGridSizeVectorized(blockSize3, gridSize3);
1335 
1336 
1337  pointwiseMultiplicationComplexKernel<<< CONVERT2DIM3(gridSize3), CONVERT2DIM3(blockSize3), 0, *streamRT >>>
1338  ((cufftComplex*)referenceAux.d_projPolarFFT.d_data, (cufftComplex*)experimentalAuxRT.d_projPolarFFT.d_data,
1339  (cufftComplex*)myStructureAuxRT.RefExpFourierPolar.d_data, referenceAux.d_projPolarFFT.nzyxdim,
1340  referenceAux.d_projPolarFFT.yxdim);
1341 
1342 
1344  myStructureAuxTR.RefExpFourier.ifftStream(myStructureAuxTR.RefExpRealSpace, myhandlePaddedTR, myStreamTR, false, dull);
1345 
1346  myStructureAuxRT.RefExpFourierPolar.ifftStream(myStructureAuxRT.RefExpRealSpacePolar, myhandlePaddedRT, myStreamRT, false, dull);
1347 
1348  XmippDim3 blockSize2(numTh, 1, 1), gridSize2;
1349  myStructureAuxTR.RefExpRealSpace.calculateGridSizeVectorized(blockSize2, gridSize2);
1350 
1351  bool power2yx, power2x;
1352  if (referenceAux.MFrealSpace.yxdim & (referenceAux.MFrealSpace.yxdim-1))
1353  power2yx = false;
1354  else
1355  power2yx = true;
1356  if (referenceAux.MFrealSpace.Xdim & (referenceAux.MFrealSpace.Xdim-1))
1357  power2x = false;
1358  else
1359  power2x = true;
1360  calculateNccKernel<<< CONVERT2DIM3(gridSize2), CONVERT2DIM3(blockSize2), 0, *streamTR >>>
1361  (myStructureAuxTR.RefExpRealSpace.d_data, referenceAux.MFrealSpace.d_data, experimentalAuxTR.MFrealSpace.d_data, referenceAux.MF2realSpace.d_data,
1362  experimentalAuxTR.MF2realSpace.d_data, referenceAux.maskAutocorrelation.d_data, myStructureAuxTR.d_NCC.d_data, referenceAux.MFrealSpace.nzyxdim,
1363  referenceAux.MFrealSpace.yxdim, referenceAux.MFrealSpace.Xdim, referenceAux.MFrealSpace.Ydim, referenceAux.maskCount, maxShift, power2yx, power2x);
1364 
1365 
1366  int fixPadding=0;
1367  if(referenceAux.XdimOrig%2==0 && referenceAux.Xdim%2==0)
1368  fixPadding=1;
1369  if(referenceAux.XdimOrig%2==0 && referenceAux.Xdim%2!=0)
1370  fixPadding=0;
1371  if(referenceAux.XdimOrig%2!=0 && referenceAux.Xdim%2==0)
1372  fixPadding=-1;
1373  if(referenceAux.XdimOrig%2!=0 && referenceAux.Xdim%2!=0)
1374  fixPadding=0;
1375 
1376  numTh = 1024;
1377  XmippDim3 blockSize4(numTh, 1, 1), gridSize4;
1378  myStructureAuxRT.RefExpRealSpacePolar.calculateGridSizeVectorized(blockSize4, gridSize4);
1379 
1380  double maskFFTPolar = (referenceAux.XdimPolar*referenceAux.YdimPolar);
1381  calculateNccRotationKernel<<< CONVERT2DIM3(gridSize4), CONVERT2DIM3(blockSize4), 0, *streamRT >>>
1382  (myStructureAuxRT.RefExpRealSpacePolar.d_data, (cufftComplex*)referenceAux.d_projPolarFFT.d_data, (cufftComplex*)experimentalAuxRT.d_projPolarFFT.d_data,
1383  (cufftComplex*)referenceAux.d_projPolarSquaredFFT.d_data, (cufftComplex*)experimentalAuxRT.d_projPolarSquaredFFT.d_data,
1384  maskFFTPolar, myStructureAuxRT.d_NCCPolar.d_data, referenceAux.d_projPolarFFT.yxdim, myStructureAuxRT.RefExpRealSpacePolar.nzyxdim,
1385  myStructureAuxRT.RefExpRealSpacePolar.yxdim);
1386 
1387  //AJ sum along the radius
1388  numTh = 1024;
1389  int numBlk = (myStructureAuxRT.d_NCCPolar.Xdim*myStructureAuxRT.d_NCCPolar.Ndim)/numTh;
1390  if((myStructureAuxRT.d_NCCPolar.Xdim*myStructureAuxRT.d_NCCPolar.Ndim)%numTh!=0)
1391  numBlk++;
1392 
1393  sumRadiusKernel<<< numBlk, numTh, 0, *streamRT >>>(myStructureAuxRT.d_NCCPolar.d_data, myStructureAuxRT.d_NCCPolar1D.d_data, myStructureAuxRT.auxMax.d_data,
1394  myStructureAuxRT.auxZero.d_data, myStructureAuxRT.d_NCCPolar.Xdim*myStructureAuxRT.d_NCCPolar.Ndim, myStructureAuxRT.d_NCCPolar.Ydim,
1395  myStructureAuxRT.d_NCCPolar.Ndim);
1396 
1397  calculateMaxNew2DNew(myStructureAuxTR.d_NCC.yxdim, myStructureAuxTR.d_NCC.Ndim,
1398  myStructureAuxTR.d_NCC.d_data, myStructureAuxTR.d_out_max, myStructureAuxTR.d_pos_max, myStreamTR);
1399 
1400  calculateMaxNew2DNew(myStructureAuxRT.d_NCCPolar1D.Xdim, myStructureAuxRT.d_NCCPolar1D.Ndim, myStructureAuxRT.d_NCCPolar1D.d_data,
1401  myStructureAuxRT.d_out_polar_max, myStructureAuxRT.d_pos_polar_max, myStreamRT);
1402 
1403  numTh = 1024;
1404  numBlk = transMatTR.Ndim/numTh;
1405  if(transMatTR.Ndim%numTh > 0)
1406  numBlk++;
1407 
1408  bool _power2x;
1409  if (myStructureAuxTR.d_NCC.Xdim & (myStructureAuxTR.d_NCC.Xdim-1))
1410  _power2x = false;
1411  else
1412  _power2x = true;
1413  double maxShift2 = (2*maxShift)*(2*maxShift);
1414  buildTranslationMatrix<<<numBlk, numTh, 0, *streamTR>>> (myStructureAuxTR.d_pos_max.d_data, transMatTR.d_data, resultTR.d_data,
1415  myStructureAuxTR.d_out_max.d_data, myStructureAuxTR.d_NCC.d_data, myStructureAuxTR.d_NCC.Xdim, myStructureAuxTR.d_NCC.Ydim,
1416  myStructureAuxTR.d_NCC.Ndim, myStructureAuxTR.d_NCC.yxdim, fixPadding, maxShift2, _power2x);
1417 
1418  numBlk = transMatRT.Ndim/numTh;
1419  if(transMatRT.Ndim%numTh > 0)
1420  numBlk++;
1421 
1422  bool __power2x;
1423  if (myStructureAuxRT.d_NCCPolar1D.Xdim & (myStructureAuxRT.d_NCCPolar1D.Xdim-1))
1424  __power2x = false;
1425  else
1426  __power2x = true;
1427  buildRotationMatrix<<<numBlk, numTh, 0, *streamRT>>> (myStructureAuxRT.d_pos_polar_max.d_data, transMatRT.d_data,
1428  resultRT.d_data, myStructureAuxRT.maxGpu.d_data, myStructureAuxRT.auxMax.d_data, myStructureAuxRT.auxZero.d_data,
1429  myStructureAuxRT.d_NCCPolar1D.Xdim, myStructureAuxRT.d_NCCPolar1D.Ndim,
1430  myStructureAuxRT.d_NCCPolar1D.yxdim, 0, maxShift2, __power2x);
1431 
1432 
1433  resultTR.copyMatrix(transMatTR, myStreamTR);
1434 
1435  resultRT.copyMatrix(transMatRT, myStreamRT);
1436 
1437  if(saveMaxVector){
1438  gpuErrchk(cudaMemcpyAsync(max_vectorTR, myStructureAuxTR.d_out_max.d_data, myStructureAuxTR.d_NCC.Ndim*sizeof(float), cudaMemcpyDeviceToHost, *streamTR));
1439  gpuErrchk(cudaMemcpyAsync(max_vectorRT, myStructureAuxRT.maxGpu.d_data, myStructureAuxRT.maxGpu.Ndim*sizeof(float), cudaMemcpyDeviceToHost, *streamRT));
1440  }
1441 
1442 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31
void resize(const GpuMultidimArrayAtGpu< T1 > &array)
GpuMultidimArrayAtGpu< float > RefExpRealSpacePolar
GpuMultidimArrayAtGpu< std::complex< float > > d_projPolarSquaredFFT
GpuMultidimArrayAtGpu< float > d_pos_polar_max
GpuMultidimArrayAtGpu< float > auxMax
GpuMultidimArrayAtGpu< float > RefExpRealSpace
void calculateMaxNew2DNew(int yxdim, int Ndim, float *d_data, GpuMultidimArrayAtGpu< float > &d_out, GpuMultidimArrayAtGpu< float > &d_pos, myStreamHandle &myStream)
GpuMultidimArrayAtGpu< std::complex< float > > RefExpFourier
GpuMultidimArrayAtGpu< float > maskAutocorrelation
GpuMultidimArrayAtGpu< std::complex< float > > d_projFFT
void ifftStream(GpuMultidimArrayAtGpu< T1 > &realSpace, mycufftHandle &myhandle, myStreamHandle &myStream, bool useCallback, GpuMultidimArrayAtGpu< std::complex< float > > &dataExp)
GpuMultidimArrayAtGpu< std::complex< float > > RefExpFourierPolar
GpuMultidimArrayAtGpu< float > d_out_max
GpuMultidimArrayAtGpu< std::complex< float > > d_projPolarFFT
GpuMultidimArrayAtGpu< float > d_NCCPolar
void calculateGridSizeVectorized(const XmippDim3 &blockSize, XmippDim3 &gridSize) const
GpuMultidimArrayAtGpu< float > d_NCCPolar1D
GpuMultidimArrayAtGpu< float > d_pos_max
GpuMultidimArrayAtGpu< float > MF2realSpace
GpuMultidimArrayAtGpu< float > MFrealSpace
GpuMultidimArrayAtGpu< float > maxGpu
GpuMultidimArrayAtGpu< float > d_NCC
void copyMatrix(TransformMatrix< float > &lastMatrix, myStreamHandle &myStream)
GpuMultidimArrayAtGpu< float > auxZero
GpuMultidimArrayAtGpu< float > d_out_polar_max

◆ padding_masking()

void padding_masking ( GpuMultidimArrayAtGpu< float > &  d_orig_image,
GpuMultidimArrayAtGpu< float > &  mask,
GpuMultidimArrayAtGpu< float > &  padded_image_gpu,
GpuMultidimArrayAtGpu< float > &  padded_image2_gpu,
GpuMultidimArrayAtGpu< float > &  padded_mask_gpu,
bool  experimental,
myStreamHandle myStream 
)

Definition at line 942 of file cuda_gpu_correlation.cpp.

944  {
945 
946  int numTh = 1024;
947  int numBlk = d_orig_image.yxdim/numTh;
948  if(d_orig_image.yxdim%numTh > 0)
949  numBlk++;
950 
951  dim3 blockSize(numTh,1,1);
952  dim3 gridSize(numBlk, d_orig_image.Ndim, 1);
953 
954  cudaStream_t *stream = (cudaStream_t*) myStream.ptr;
955  gpuErrchk(cudaMemsetAsync(padded_image_gpu.d_data, 0, padded_image_gpu.nzyxdim*sizeof(float), *stream));
956  gpuErrchk(cudaMemsetAsync(padded_image2_gpu.d_data, 0, padded_image2_gpu.nzyxdim*sizeof(float), *stream));
957  if(padded_mask_gpu.d_data!=NULL)
958  gpuErrchk(cudaMemsetAsync(padded_mask_gpu.d_data, 0, padded_mask_gpu.nzyxdim*sizeof(float), *stream));
959 
960  bool power2;
961  if (d_orig_image.Xdim & (d_orig_image.Xdim-1))
962  power2 = false;
963  else
964  power2 = true;
965  maskingPaddingKernel<<< gridSize, blockSize, 0, *stream>>>(d_orig_image.d_data, mask.d_data,
966  padded_image_gpu.d_data, padded_image2_gpu.d_data, padded_mask_gpu.d_data,
967  d_orig_image.Xdim, d_orig_image.Ydim, d_orig_image.yxdim, d_orig_image.Ndim,
968  padded_image_gpu.Xdim, padded_image_gpu.Ydim, padded_image_gpu.yxdim, experimental, power2);
969 
970 }
#define gpuErrchk(code)
Definition: cuda_asserts.h:31